### Importing required Libraries

In [30]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

### Load datasets

In [31]:

customers = pd.read_csv("Customers.csv")
products = pd.read_csv("Products.csv")
transactions = pd.read_csv("Transactions.csv")


### Merge datasets

In [32]:
transactions = transactions.merge(customers, on="CustomerID", how="left")
transactions = transactions.merge(products, on="ProductID", how="left")

### Feature Engineering

In [33]:
customer_profiles = transactions.groupby("CustomerID").agg({
    "TotalValue": "mean",
    "Quantity": "sum",
    "Category": lambda x: x.value_counts().idxmax(),
    "Price_x": "mean"
}).reset_index()


### Encode categorical features

In [34]:
customer_profiles = pd.get_dummies(customer_profiles, columns=["Category"], drop_first=True)

### Normalize features

In [35]:
scaler = StandardScaler()
features = customer_profiles.drop("CustomerID", axis=1)
features_scaled = scaler.fit_transform(features)

### Compute cosine similarity

In [36]:
similarity_matrix = cosine_similarity(features_scaled)

### Create Lookalike dictionary

In [37]:
lookalike_map = {}
for idx, customer_id in enumerate(customer_profiles["CustomerID"][:20]):  # First 20 customers
    similarity_scores = list(enumerate(similarity_matrix[idx]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    top_3 = [(customer_profiles.iloc[i]["CustomerID"], round(score, 4)) for i, score in sorted_scores[1:4]]  # Top 3
    lookalike_map[customer_id] = top_3

### Export Lookalike map to CSV

In [38]:
lookalike_df = pd.DataFrame.from_dict(lookalike_map, orient="index", columns=["Customer1", "Customer2", "Customer3"])
lookalike_df.to_csv("Lookalike.csv", index_label="CustomerID")

print("Lookalike.csv generated successfully.")


Lookalike.csv generated successfully.
