In [17]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

customers_path = r"C:\Users\karth\Downloads\Customers.csv"
products_path = r"C:\Users\karth\Downloads\Products.csv"

customers_df = pd.read_csv(customers_path)
products_df = pd.read_csv(products_path)



In [18]:
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
customers_df['SignupYear'] = customers_df['SignupDate'].dt.year
customers_df['SignupMonth'] = customers_df['SignupDate'].dt.month

customers_encoded = pd.get_dummies(customers_df[['Region', 'SignupYear', 'SignupMonth']], drop_first=True)

scaler = StandardScaler()
customers_scaled = scaler.fit_transform(customers_encoded)

similarity_matrix = cosine_similarity(customers_scaled)

similarity_df = pd.DataFrame(similarity_matrix, index=customers_df['CustomerID'], columns=customers_df['CustomerID'])



In [19]:
def get_top_lookalikes(similarity_df, top_n=3):
    lookalikes = {}
    for customer_id in similarity_df.index[:20]:  
        similar_customers = similarity_df[customer_id].nlargest(top_n + 1) 
        similar_customers = similar_customers[similar_customers.index != customer_id] 
        lookalikes[customer_id] = [(similar_cust_id, score) for similar_cust_id, score in zip(similar_customers.index, similar_customers.values)]
    return lookalikes

lookalike_results = get_top_lookalikes(similarity_df)

In [20]:
lookalike_list = []
for cust_id, similar in lookalike_results.items():
    lookalike_list.append({'CustomerID': cust_id, 'Lookalikes': similar})

for entry in lookalike_list:
    entry['Lookalikes'] = str(entry['Lookalikes'])

lookalike_df = pd.DataFrame(lookalike_list)



In [21]:
lookalike_df.to_csv(r"C:\Users\karth\Downloads\Lookalike.csv", index=False)

print("\nLookalike customers have been saved to Lookalike.csv.")


Lookalike customers have been saved to Lookalike.csv.
