In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
customers_df = pd.read_csv("Customers.csv")
products_df = pd.read_csv("Products.csv")
transactions_df = pd.read_csv("Transactions.csv")

In [None]:
merged_df = transactions_df.merge(customers_df, on="CustomerID").merge(products_df, on="ProductID")

In [None]:
customer_features = merged_df.groupby("CustomerID").agg({
    "Region": 'first',
    "SignupDate": 'first',
    "Category": lambda x: x.mode()[0],  # Most frequent category
    "TotalValue": 'sum',
    "Quantity": 'sum'
}).reset_index()


In [None]:
customer_features["SignupDate"] = pd.to_datetime(customer_features["SignupDate"])  # Convert to datetime
customer_features["SignupDate"] = (customer_features["SignupDate"] - customer_features["SignupDate"].min()).dt.days

In [None]:
encoder = OneHotEncoder()
categorical_features = encoder.fit_transform(customer_features[["Region", "Category"]]).toarray()

In [None]:
scaler = StandardScaler()
numerical_features = scaler.fit_transform(customer_features[["SignupDate", "TotalValue", "Quantity"]])


In [None]:
final_features = np.hstack((numerical_features, categorical_features))


In [None]:
similarity_matrix = cosine_similarity(final_features)


In [None]:
lookalike_data = {}
for idx in range(20):  # First 20 customers
    customer_id = customer_features.iloc[idx]["CustomerID"]
    similar_indices = np.argsort(similarity_matrix[idx])[::-1][1:4]  # Top 3 similar customers
    similar_customers = [(customer_features.iloc[i]["CustomerID"], round(similarity_matrix[idx][i], 4)) for i in similar_indices]
    lookalike_data[customer_id] = similar_customers

In [None]:
lookalike_df = pd.DataFrame(list(lookalike_data.items()), columns=["CustomerID", "SimilarCustomers"])
lookalike_df.to_csv("Lookalike.csv", index=False)

In [None]:
print("Lookalike model completed. Check Lookalike.csv for results.")


Lookalike model completed. Check Lookalike.csv for results.
