In [11]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load datasets
customers_file = "C:/Users/Eshwar/Downloads/Customers.csv"
products_file = "C:/Users/Eshwar/Downloads/Products.csv"
transactions_file = "C:/Users/Eshwar/Downloads/Transactions.csv"

# Merge datasets
merged_df = transactions_df.merge(customers_df, on='CustomerID').merge(products_df, on='ProductID')

# Feature Engineering: Aggregate transaction history for each customer
customer_transactions = merged_df.groupby('CustomerID').agg({
    'TotalValue': 'sum',
    'Quantity': 'sum',
    'ProductID': lambda x: x.nunique()  # Unique products bought
}).reset_index()

# Normalize features
scaler = StandardScaler()
customer_transactions[['TotalValue', 'Quantity', 'ProductID']] = scaler.fit_transform(
    customer_transactions[['TotalValue', 'Quantity', 'ProductID']]
)

# Calculate similarity matrix using Cosine Similarity
similarity_matrix = cosine_similarity(customer_transactions[['TotalValue', 'Quantity', 'ProductID']])

# Create a DataFrame for easy lookup
similarity_df = pd.DataFrame(similarity_matrix, index=customer_transactions['CustomerID'], columns=customer_transactions['CustomerID'])

# Function to recommend top 3 similar customers for each of the first 20 customers
lookalike_recommendations = {}

for customer in customer_transactions['CustomerID'][:20]:
    # Get the similarity scores and sort by descending order
    similar_customers = similarity_df[customer].sort_values(ascending=False).iloc[1:4]  # Skip the customer itself (index 0)
    lookalike_recommendations[customer] = list(zip(similar_customers.index, similar_customers.values))

# Save the results to Lookalike.csv
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': str(lookalikes)} 
    for cust_id, lookalikes in lookalike_recommendations.items()
])

lookalike_df.to_csv('Lookalike.csv', index=False)

# Print the top 3 recommendations for each customer
print(lookalike_df)


   CustomerID                                         Lookalikes
0       C0001  [('C0164', 0.9684103747672834), ('C0137', 0.96...
1       C0002  [('C0029', 0.9997616343498978), ('C0031', 0.99...
2       C0003  [('C0176', 0.8906401232895584), ('C0027', 0.86...
3       C0004  [('C0075', 0.9976740652389241), ('C0175', 0.99...
4       C0005  [('C0058', 0.9997982043779898), ('C0123', 0.99...
5       C0006  [('C0079', 0.9998795967431424), ('C0196', 0.99...
6       C0007  [('C0140', 0.9985337287784547), ('C0085', 0.99...
7       C0008  [('C0179', 0.9965086350321659), ('C0081', 0.99...
8       C0009  [('C0192', 0.9983620640345526), ('C0177', 0.99...
9       C0010  [('C0142', 0.964094866380962), ('C0027', 0.956...
10      C0011  [('C0023', 0.9819122814188915), ('C0064', 0.97...
11      C0012  [('C0041', 0.997318308138947), ('C0045', 0.995...
12      C0013  [('C0059', 0.9992581092102039), ('C0141', 0.99...
13      C0014  [('C0033', 0.9996378284444327), ('C0095', 0.99...
14      C0015  [('C0131',

In [5]:
print(merged_df.columns)


Index(['TransactionID', 'CustomerID', 'ProductID', 'TransactionDate',
       'Quantity', 'TotalValue', 'Price_x', 'CustomerName', 'Region',
       'SignupDate', 'ProductName', 'Category', 'Price_y'],
      dtype='object')


In [13]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the datasets (Customers and Transactions)
customers_df = pd.read_csv('Customers.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Preprocess and merge the data
# Assuming 'CustomerID', 'ProductID', 'Quantity', 'TotalValue' columns in Transactions.csv
# Assuming 'CustomerID', 'Name', 'Age', 'Gender' columns in Customers.csv

# Merge customer and transaction data on 'CustomerID'
merged_df = pd.merge(transactions_df, customers_df, on='CustomerID', how='left')

# Aggregate data per customer
customer_summary = merged_df.groupby('CustomerID').agg(
    total_value=('TotalValue', 'sum'),
    total_quantity=('Quantity', 'sum'),
    unique_products=('ProductID', 'nunique')
).reset_index()

# Normalize the data
scaler = StandardScaler()
customer_summary_scaled = scaler.fit_transform(customer_summary[['total_value', 'total_quantity', 'unique_products']])

# Calculate cosine similarity between customers
cos_sim = cosine_similarity(customer_summary_scaled)

# Create a DataFrame for easy mapping of customer similarities
cos_sim_df = pd.DataFrame(cos_sim, columns=customer_summary['CustomerID'], index=customer_summary['CustomerID'])

# Function to recommend top 3 lookalikes for a given customer
def recommend_lookalikes(customer_id, top_n=3):
    similar_customers = cos_sim_df[customer_id].sort_values(ascending=False)[1:top_n+1]
    recommendations = [(customer, similarity_score) for customer, similarity_score in zip(similar_customers.index, similar_customers.values)]
    return recommendations

# Create the lookalike mapping for the first 20 customers (C0001 - C0020)
lookalike_map = {}

for i in range(1, 21):
    customer_id = f'C{i:04d}'  # Format customer ID as 'C0001', 'C0002', ...
    recommendations = recommend_lookalikes(customer_id)
    lookalike_map[customer_id] = recommendations

# Convert the lookalike map into a format suitable for CSV
lookalike_list = []
for customer_id, recommendations in lookalike_map.items():
    for lookalike, score in recommendations:
        lookalike_list.append([customer_id, lookalike, score])

# Convert the list to a DataFrame
lookalike_df = pd.DataFrame(lookalike_list, columns=['CustomerID', 'LookalikeID', 'SimilarityScore'])

# Save the result as a CSV
lookalike_df.to_csv('Lookalike.csv', index=False)

# Save the lookalike map as a dictionary to check the structure
print("Lookalike map:")
print(lookalike_map)

print("\nLookalike model created and saved to 'Lookalike.csv'.")


Lookalike model created and saved to 'Lookalike.csv'.
