In [39]:
#Lookalike Model
# Import necessary libraries
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

# File paths
customers_path = r"C:\Users\gaura\Downloads\Customers.xlsx"
products_path = r"C:\Users\gaura\Downloads\Products.xlsx"
transactions_path = r"C:\Users\gaura\Downloads\Transactions.xlsx"

# Load datasets
customers_df = pd.read_excel(customers_path)
products_df = pd.read_excel(products_path)
transactions_df = pd.read_excel(transactions_path)

# Merge datasets
transactions_products = pd.merge(transactions_df, products_df, on="ProductID", how="left")
customer_transactions = pd.merge(transactions_products, customers_df, on="CustomerID", how="left")

# Feature Engineering
customer_features = customer_transactions.groupby("CustomerID").agg(
    total_spend=("TotalValue", "sum"),
    purchase_frequency=("TransactionID", "count"),
    avg_quantity=("Quantity", "mean"),
    unique_categories=("Category", "nunique"),
).reset_index()

# Add metadata
customer_profiles = customers_df[["CustomerID", "Region", "SignupDate"]]
customer_features = pd.merge(customer_features, customer_profiles, on="CustomerID", how="left")
customer_features = pd.get_dummies(customer_features, columns=["Region"], prefix="region")

# Normalize numerical features
scaler = MinMaxScaler()
numerical_cols = ["total_spend", "purchase_frequency", "avg_quantity", "unique_categories"]
customer_features[numerical_cols] = scaler.fit_transform(customer_features[numerical_cols])

# Compute Cosine Similarity
feature_cols = [col for col in customer_features.columns if col not in ["CustomerID", "SignupDate"]]
feature_matrix = customer_features[feature_cols].values
similarity_matrix = cosine_similarity(feature_matrix)

# Generate Lookalike Recommendations
first_20_customers = customer_features.iloc[:20]["CustomerID"].values
lookalike_results = {}

for idx, cust_id in enumerate(first_20_customers):
    similar_indices = similarity_matrix[idx].argsort()[::-1][1:4]
    similar_customers = [
        {"CustomerID": customer_features.iloc[i]["CustomerID"], 
         "SimilarityScore": round(similarity_matrix[idx][i], 4)}
        for i in similar_indices
    ]
    lookalike_results[cust_id] = similar_customers

# Create a structured dataframe for lookalike results
lookalike_csv_data = []

for cust_id, lookalikes in lookalike_results.items():
    row_data = {"CustomerID": cust_id}
    for i, lookalike in enumerate(lookalikes, start=1):
        row_data[f"SimilarCustomerID_{i}"] = lookalike["CustomerID"]
        row_data[f"SimilarityScore_{i}"] = lookalike["SimilarityScore"]
    lookalike_csv_data.append(row_data)

# Convert the list of dictionaries to a DataFrame
lookalike_csv_df = pd.DataFrame(lookalike_csv_data)

# Save the dataframe to Lookalike.csv
output_file = r"C:\Users\gaura\Downloads\Lookalike.csv"
lookalike_csv_df.to_csv(output_file, index=False)

print(f"Lookalike recommendations saved to: {output_file}")


Lookalike recommendations saved to: C:\Users\gaura\Downloads\Lookalike.csv
