In [5]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

customers = pd.read_csv('Data/Customers.csv')
products = pd.read_csv('Data/Products.csv')
transactions = pd.read_csv('Data/Transactions.csv')


# Merge datasets
merged_data = pd.merge(transactions, customers, on='CustomerID')
merged_data = pd.merge(merged_data, products, on='ProductID')

# Prepare features for lookalike model
# Aggregating total spending and purchase frequency for each customer
customer_features = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending
    'TransactionID': 'count',  # Number of transactions
    'Quantity': 'sum',  # Total quantity purchased
}).rename(columns={
    'TotalValue': 'TotalSpending',
    'TransactionID': 'PurchaseFrequency',
    'Quantity': 'TotalQuantity'
})

# Standardizing features for similarity calculation
scaler = StandardScaler()
scaled_features = scaler.fit_transform(customer_features)

# Calculate cosine similarity
similarity_matrix = cosine_similarity(scaled_features)
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features.index, columns=customer_features.index)

# Generate recommendations for the first 20 customers (C0001 to C0020)
lookalike_results = {}
for customer_id in customer_features.index[:20]:
    similar_customers = similarity_df[customer_id].sort_values(ascending=False)[1:4]  # Exclude self-similarity
    lookalike_results[customer_id] = similar_customers.to_dict()

# Save results to Lookalike.csv
lookalike_df = pd.DataFrame({
    'CustomerID': list(lookalike_results.keys()),
    'Recommendations': [list(recommendations.keys()) for recommendations in lookalike_results.values()],
    'SimilarityScores': [list(recommendations.values()) for recommendations in lookalike_results.values()]
})
lookalike_df.to_csv('Outputs/Athish_Kishan_Lookalike.csv', index=False)

print("Lookalike model recommendations saved to Athish_Kishan_Lookalike.csv")

Lookalike model recommendations saved to Lookalike.csv
