In [None]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler


# read the csv files 
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')



# merged all the dataframes 
merged_df = transactions_df.merge(customers_df, on='CustomerID', how='left').merge(products_df, on='ProductID', how='left')


# Preprocessing - Create a customer-product matrix
customer_product_matrix = merged_df.pivot_table(index='CustomerID', 
                                           columns='ProductID', 
                                           values='Quantity', 
                                           aggfunc='sum', 
                                           fill_value=0)

# Normalize data
scaler = StandardScaler()
normalized_matrix = scaler.fit_transform(customer_product_matrix)

# Compute cosine similarity
similarity_matrix = cosine_similarity(normalized_matrix)

# Create a DataFrame for similarity scores
similarity_df = pd.DataFrame(similarity_matrix, 
                             index=customer_product_matrix.index, 
                             columns=customer_product_matrix.index)

# Function to get top 3 similar customers for a given customer
def get_top_similar(customers_df, customer_id, top_n=3):
    similar_customers = customers_df[customer_id].sort_values(ascending=False)[1:top_n+1]
    return list(zip(similar_customers.index, similar_customers.values))

# Generate lookalikes for the first 20 customers
lookalike_map = {}
for customer_id in similarity_df.index[:20]:
    lookalike_map[customer_id] = get_top_similar(similarity_df, customer_id)

# Convert lookalike map to a DataFrame for CSV output
lookalike_list = []
for cust_id, lookalikes in lookalike_map.items():
    for lookalike_id, score in lookalikes:
        lookalike_list.append({'cust_id': cust_id, 'lookalike_id': lookalike_id, 'score': score})

lookalike_df = pd.DataFrame(lookalike_list)

# Save to CSV
lookalike_df.to_csv('Lookalike.csv', index=False)

print("Lookalike model output saved to Lookalike.csv.")