In [3]:
import pandas as pd
customers = pd.read_csv(r"C:\Users\sandr\Downloads\Customers.csv")
products = pd.read_csv(r"C:\Users\sandr\Downloads\Products.csv")
transactions = pd.read_csv(r"C:\Users\sandr\Downloads\Transactions.csv")
merged_data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')



In [4]:
# Create customer profiles
customer_profiles = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'TransactionID': 'count',
    'Category': lambda x: x.mode()[0],  # Most common category
}).reset_index()

customer_profiles.rename(columns={'TransactionID': 'NumTransactions'}, inplace=True)

In [5]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Normalize features
scaler = StandardScaler()
features = scaler.fit_transform(customer_profiles[['TotalValue', 'NumTransactions']])

# Calculate cosine similarity
similarity_matrix = cosine_similarity(features)

In [6]:
lookalike_recommendations = {}

for i in range(20):  # For customers C0001 to C0020
    similar_indices = similarity_matrix[i].argsort()[-4:-1][::-1]  # Get top 3 similar customers
    similar_customers = customer_profiles.iloc[similar_indices]
    scores = similarity_matrix[i][similar_indices]
    
    lookalike_recommendations[customer_profiles['CustomerID'].iloc[i]] = list(zip(similar_customers['CustomerID'], scores))

In [7]:
# Prepare data for CSV
lookalike_data = {
    'cust_id': [],
    'lookalikes': []
}

for cust_id, recommendations in lookalike_recommendations.items():
    lookalike_data['cust_id'].append(cust_id)
    lookalike_data['lookalikes'].append(recommendations)

lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv('Lookalike.csv', index=False)