# Importing Libraries

In [31]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Load the datasets

In [32]:
customers_df = pd.read_csv('Customers.csv', encoding='utf-8-sig')
transactions_df = pd.read_csv('Transactions.csv', encoding='utf-8-sig')
products_df = pd.read_csv('Products.csv', encoding='utf-8-sig')

In [44]:
# Display the first few rows of the dataframe to understand its structure
customers_df.head()

Unnamed: 0,CustomerID,CustomerName,Region,SignupDate
0,C0001,Lawrence Carroll,South America,2022-07-10
1,C0002,Elizabeth Lutz,Asia,2022-02-13
2,C0003,Michael Rivera,South America,2024-03-07
3,C0004,Kathleen Rodriguez,South America,2022-10-09
4,C0005,Laura Weber,Asia,2022-08-15


# Merge Transactions with Products

In [33]:
transactions_products = pd.merge(transactions_df, products_df, on='ProductID')

# Merge Transactions with Customers

In [34]:
transactions_customers = pd.merge(transactions_products, customers_df, on='CustomerID')

# Feature Engineering: Create a pivot table with customers and their average spending per category

In [35]:
customer_category_spending = transactions_customers.pivot_table(
    index='CustomerID', 
    columns='Category', 
    values='TotalValue', 
    aggfunc='mean', 
    fill_value=0
)

# Standardize the features

In [36]:
scaler = StandardScaler()
customer_category_spending_scaled = scaler.fit_transform(customer_category_spending)

# Calculate cosine similarity between customers

In [37]:
similarity_matrix = cosine_similarity(customer_category_spending_scaled)

In [46]:
similarity_matrix

array([[ 1.        , -0.00285627,  0.90866219, ..., -0.20421403,
         0.56045775, -0.85178099],
       [-0.00285627,  1.        , -0.20564071, ...,  0.7605466 ,
         0.64846917, -0.48565634],
       [ 0.90866219, -0.20564071,  1.        , ..., -0.35825782,
         0.37542747, -0.67246518],
       ...,
       [-0.20421403,  0.7605466 , -0.35825782, ...,  1.        ,
         0.06428837, -0.31431714],
       [ 0.56045775,  0.64846917,  0.37542747, ...,  0.06428837,
         1.        , -0.69710257],
       [-0.85178099, -0.48565634, -0.67246518, ..., -0.31431714,
        -0.69710257,  1.        ]])

# Convert similarity matrix to DataFrame

In [38]:
similarity_df = pd.DataFrame(similarity_matrix, 
                             index=customer_category_spending.index, 
                             columns=customer_category_spending.index)

# Function to get top 3 similar customers

In [39]:
def get_top_3_similar(customers_df, similarity_df, customer_id):
    similar_customers = similarity_df[customer_id].sort_values(ascending=False).iloc[1:4]
    return list(similar_customers.index), list(similar_customers.values)

# Generate Lookalike recommendations for the first 20 customers

In [40]:
lookalike_dict = {}
for customer_id in customers_df['CustomerID'].iloc[:20]:
    similar_ids, scores = get_top_3_similar(customers_df, similarity_df, customer_id)
    lookalike_dict[customer_id] = list(zip(similar_ids, scores))

# Convert the lookalike dictionary to a DataFrame

In [41]:
lookalike_df = pd.DataFrame.from_dict(lookalike_dict, orient='index', columns=['Lookalike1', 'Lookalike2', 'Lookalike3'])

# Save the lookalike recommendations to a CSV file

In [42]:
lookalike_df.to_csv('Rahul_Chauhan_Lookalike.csv'

In [43]:
lookalike_df.head()

Unnamed: 0,Lookalike1,Lookalike2,Lookalike3
C0001,"(C0184, 0.9472355498942732)","(C0047, 0.9290902929881713)","(C0152, 0.915772822583414)"
C0002,"(C0134, 0.9417241221886844)","(C0062, 0.8702122771421835)","(C0128, 0.8399356355278532)"
C0003,"(C0163, 0.996442720815831)","(C0152, 0.9738913786466057)","(C0012, 0.9738079924314287)"
C0004,"(C0090, 0.9870839964399987)","(C0064, 0.9479684406070296)","(C0127, 0.9134965740927001)"
C0005,"(C0197, 0.9771085009829121)","(C0007, 0.9534556278772148)","(C0199, 0.9158394606789362)"
