In [None]:
#TASK2--------

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load datasets
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

# Preprocess and merge data
# Merge transactions with products to include product details
transactions_merged = pd.merge(transactions_df, products_df, on="ProductID", how="left")

# Merge transactions with customers to include customer details
full_data = pd.merge(transactions_merged, customers_df, on="CustomerID", how="left")

# Aggregate data to create a customer profile
customer_profile = full_data.groupby("CustomerID").agg({
    "TotalValue": "sum",  # Total spending
    "ProductID": "nunique",  # Number of unique products purchased
    "Category": lambda x: x.mode()[0],  # Most purchased category
    "Region": lambda x: x.mode()[0],  # Most frequent region
}).reset_index()

# Encode categorical variables (Category and Region)
customer_profile = pd.get_dummies(customer_profile, columns=["Category", "Region"], drop_first=True)

# Scale numerical features
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_profile.drop("CustomerID", axis=1))

# Compute similarity matrix using cosine similarity
similarity_matrix = cosine_similarity(scaled_features)

# Create a function to get top 3 lookalikes
def get_top_lookalikes(customer_id, similarity_matrix, customer_ids, top_n=3):
    customer_idx = customer_ids.index(customer_id)
    similarities = list(enumerate(similarity_matrix[customer_idx]))
    similarities = sorted(similarities, key=lambda x: x[1], reverse=True)
    top_similar_customers = [
        (customer_ids[idx], round(score, 3)) for idx, score in similarities[1:top_n+1]  # Exclude self
    ]
    return top_similar_customers

# Generate lookalikes for customers C0001 to C0020
customer_ids = list(customer_profile["CustomerID"])
lookalike_map = {}
for customer_id in customer_ids[:20]:
    lookalike_map[customer_id] = get_top_lookalikes(customer_id, similarity_matrix, customer_ids)

# Convert the lookalike map to a DataFrame for output
lookalike_df = pd.DataFrame({
    "CustomerID": lookalike_map.keys(),
    "Lookalikes": [str(val) for val in lookalike_map.values()]
})

# Save the Lookalike.csv
lookalike_df.to_csv("Lookalike.csv", index=False)

# --- Sample Output ---
print("Sample Lookalike Recommendations:")
print(lookalike_df.head())


Sample Lookalike Recommendations:
  CustomerID                                         Lookalikes
0      C0001  [('C0091', 0.999), ('C0190', 0.997), ('C0048',...
1      C0002  [('C0088', 0.979), ('C0134', 0.964), ('C0106',...
2      C0003  [('C0031', 0.991), ('C0052', 0.986), ('C0076',...
3      C0004  [('C0155', 0.987), ('C0087', 0.955), ('C0153',...
4      C0005  [('C0186', 0.999), ('C0007', 0.993), ('C0140',...
