In [1]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [2]:
# Load the datasets
customers = pd.read_csv(r'C:\Users\DELL\Downloads\Customers.csv')
products = pd.read_csv(r'C:\Users\DELL\Downloads\Products.csv')
transactions = pd.read_csv(r'C:\Users\DELL\Downloads\Transactions.csv')

In [3]:
# Merge customers and transactions data
customer_transactions = pd.merge(transactions, customers, on='CustomerID')
customer_transactions = pd.merge(customer_transactions, products, on='ProductID')


In [4]:
# Create features based on transaction history
customer_features = customer_transactions.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': 'nunique'
}).reset_index()

In [5]:
# Normalize the data
scaler = StandardScaler()
customer_features[['TotalValue', 'Quantity', 'ProductID']] = scaler.fit_transform(
    customer_features[['TotalValue', 'Quantity', 'ProductID']]
)

In [6]:
# Calculate cosine similarity between customers based on transaction history
cos_sim = cosine_similarity(customer_features[['TotalValue', 'Quantity', 'ProductID']])

In [7]:
# Create a DataFrame to store similarity scores
similarity_df = pd.DataFrame(cos_sim, columns=customer_features['CustomerID'], index=customer_features['CustomerID'])

In [8]:
# For each customer, find top 3 similar customers
top_similar_customers = {}
for customer_id in customer_features['CustomerID']:
    similar_customers = similarity_df[customer_id].nlargest(4).iloc[1:]  # Exclude the customer itself
    top_similar_customers[customer_id] = similar_customers

In [9]:
# Save the results to Lookalike.csv
lookalike_data = []
for customer_id, similar_customers in top_similar_customers.items():
    for similar_id, score in similar_customers.items():
        lookalike_data.append({'CustomerID': customer_id, 'SimilarCustomerID': similar_id, 'Score': score})

In [10]:
lookalike_df = pd.DataFrame(lookalike_data)
lookalike_df.to_csv('Lookalike.csv', index=False)
print("Lookalike model completed. Output saved to Lookalike.csv.")

Lookalike model completed. Output saved to Lookalike.csv.


In [11]:
lookalike_df.to_csv(r'C:\Users\DELL\Downloads\Lookalike.csv', index=False)
