In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

In [6]:
# Load Datasets
transactions_df = pd.read_csv('Transactions.csv')
products_df = pd.read_csv('Products.csv')
customers_df = pd.read_csv('Customers.csv')

In [8]:
merged_df = pd.merge(transactions_df, products_df, on='ProductID', suffixes=('_transaction', '_product'))

if 'Price_transaction' in merged_df.columns:
    merged_df['Price'] = merged_df['Price_transaction']
elif 'Price_product' in merged_df.columns:
    merged_df['Price'] = merged_df['Price_product']



In [9]:
# Drop redundant columns
merged_df.drop(columns=['Price_transaction', 'Price_product'], errors='ignore', inplace=True)


In [10]:
#  Aggregate Data Per Customer
# Aggregate transaction history and product details per customer
customer_product_df = merged_df.groupby(['CustomerID', 'ProductID']).agg({
    'Quantity': 'sum',
    'Price': 'mean',
    'Category': 'first'
}).reset_index()

# One-hot encode categorical features (e.g., Category)
customer_product_df = pd.get_dummies(customer_product_df, columns=['Category'], drop_first=True)




In [40]:
import pandas as pd

# Load the Transactions dataset
transactions_df = pd.read_csv('Transactions.csv')

# Print the column names
print(transactions_df.columns.tolist())


['TransactionID', 'CustomerID', 'ProductID', 'TransactionDate', 'Quantity', 'TotalValue', 'Price']


In [11]:
 #Prepare Data for Similarity Computation
pivot_table = customer_product_df.pivot_table(
    index='CustomerID',
    columns='ProductID',
    values='Quantity',
    fill_value=0
)


In [12]:
# Standardize the pivot table
scaler = StandardScaler()
scaled_pivot = scaler.fit_transform(pivot_table)
scaled_pivot_df = pd.DataFrame(scaled_pivot, index=pivot_table.index, columns=pivot_table.columns)


In [13]:
# Compute cosine similarity between all customers
similarity_matrix = cosine_similarity(scaled_pivot_df)

In [14]:
similarity_df = pd.DataFrame(similarity_matrix, index=pivot_table.index, columns=pivot_table.index)


In [15]:
# Step 6: Generate Lookalike Recommendations
# Function to find top-N similar customers
def get_top_n_similar(customers, similarity_df, top_n=3):
    lookalike_map = {}
    for customer_id in customers:
        similar_customers = similarity_df.loc[customer_id].sort_values(ascending=False).iloc[1:top_n + 1]
        lookalike_map[customer_id] = list(zip(similar_customers.index, similar_customers.values))
    return lookalike_map

# Get top-3 similar customers for the first 20 customers
top_20_customers = customers_df.loc[customers_df['CustomerID'].isin([f'C{str(i).zfill(4)}' for i in range(1, 21)])]
lookalike_map = get_top_n_similar(top_20_customers['CustomerID'], similarity_df, top_n=3)


In [16]:
# Convert lookalike map to required format and save
output_data = [{'CustomerID': k, 'Lookalikes': v} for k, v in lookalike_map.items()]
lookalike_df = pd.DataFrame(output_data)
lookalike_df.to_csv('Lookalike.csv', index=False)


In [17]:
# Output
print("Lookalike recommendations saved to 'Lookalike.csv'. Here are the first few rows:")
print(lookalike_df.head())

Lookalike recommendations saved to 'Lookalike.csv'. Here are the first few rows:
  CustomerID                                         Lookalikes
0      C0001  [(C0194, 0.40492753118932323), (C0104, 0.37400...
1      C0002  [(C0030, 0.40461685378594076), (C0091, 0.38377...
2      C0003  [(C0181, 0.4775717980039305), (C0134, 0.471016...
3      C0004  [(C0070, 0.3519014889798192), (C0175, 0.316097...
4      C0005  [(C0096, 0.48745613929263704), (C0023, 0.47025...
