In [5]:
from google.colab import files
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Upload CSV files
uploaded = files.upload()

# Load the datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

Saving Customers.csv to Customers.csv
Saving Products.csv to Products.csv
Saving Transactions.csv to Transactions.csv


In [6]:
# Merge datasets
merged_df = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# Aggregate transaction data for each customer
customer_summary = merged_df.groupby('CustomerID').agg({
    'Quantity': 'sum',
    'TotalValue': 'sum',
    'Price_x': 'mean',  # Average price of products purchased
    'ProductID': 'nunique',  # Number of unique products purchased
    'Category': lambda x: x.value_counts().idxmax()  # Most common product category
}).reset_index()

# Merge with customer data
customer_features = customers.merge(customer_summary, on='CustomerID')

In [7]:
# Select numerical features for normalization
numerical_features = ['Quantity', 'TotalValue', 'Price_x', 'ProductID']

# Normalize the numerical features
scaler = StandardScaler()
customer_features[numerical_features] = scaler.fit_transform(customer_features[numerical_features])

# Calculate cosine similarity between customers
similarity_matrix = cosine_similarity(customer_features[numerical_features])

# Convert to DataFrame for easier handling
similarity_df = pd.DataFrame(similarity_matrix, index=customer_features['CustomerID'], columns=customer_features['CustomerID'])

# Recommend similar customers function
def recommend_similar_customers(customer_id, n_recommendations=3):
    similarity_scores = similarity_df.loc[customer_id]
    similar_customers = similarity_scores.sort_values(ascending=False).iloc[1:n_recommendations+1]
    recommendations = pd.DataFrame({'CustomerID': similar_customers.index, 'SimilarityScore': similar_customers.values})
    return recommendations

# Prepare the Lookalike.csv file for the first 20 customers
lookalike_dict = {}

for customer_id in customers['CustomerID'][:20]:
    recommendations = recommend_similar_customers(customer_id)
    lookalike_dict[customer_id] = recommendations.values.tolist()

lookalike_df = pd.DataFrame.from_dict(lookalike_dict, orient='index', columns=['Lookalike1', 'Lookalike2', 'Lookalike3'])
lookalike_df.to_csv('FirstName_LastName_Lookalike.csv', index_label='CustomerID')