In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Loading datasets
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')


In [5]:
# Merging datasets for comprehensive analysis
merged_df = pd.merge(transactions_df, customers_df, on='CustomerID')
merged_df = pd.merge(merged_df, products_df, on='ProductID')

# Creating a customer-product matrix
customer_product_matrix = merged_df.pivot_table(index='CustomerID', columns='ProductID', values='Quantity', fill_value=0)


In [6]:
# Standardizing the customer-product matrix
scaler = StandardScaler()
customer_product_matrix_scaled = scaler.fit_transform(customer_product_matrix)

# Computing cosine similarity between customers
cosine_sim = cosine_similarity(customer_product_matrix_scaled)

# Creating a DataFrame for cosine similarity
cosine_sim_df = pd.DataFrame(cosine_sim, index=customer_product_matrix.index, columns=customer_product_matrix.index)


In [None]:
# Function to get the top 3 similar customers
def get_top_similar_customers(customer_id, cosine_sim_df, top_n=3):
    sim_scores = cosine_sim_df[customer_id].sort_values(ascending=False)
    top_similar_customers = sim_scores.iloc[1:top_n+1]  # Excluding the customer itself
    return top_similar_customers

# Generating lookalike recommendations for the first 20 customers
lookalike_map = {}
for customer_id in customers_df['CustomerID'][:20]:
    top_similar_customers = get_top_similar_customers(customer_id, cosine_sim_df)
    lookalike_map[customer_id] = list(zip(top_similar_customers.index, top_similar_customers.values))

# Convert the lookalike map to a DataFrame
lookalike_df = pd.DataFrame.from_dict(lookalike_map, orient='index')
lookalike_df = lookalike_df.stack().apply(pd.Series).reset_index(level=1, drop=True)
lookalike_df.columns = ['SimilarCustomerID', 'SimilarityScore']

# Save the lookalike recommendations to a CSV file
lookalike_df.to_csv('Bishal_Mishra_Lookalike.csv', index=True)

# Displaying the lookalike recommendations
print(lookalike_df.head(60))  # Displaying top 60 recommendations (3 per customer for 20 customers)