## Importing Data

In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity


customers_df = pd.read_csv('Customers.csv')
transactions_df = pd.read_csv('Transactions.csv')
products_df = pd.read_csv('Products.csv')



## Preprocessing data and Aggregating, Grouping 

In [None]:
# Preprocessing
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'], format='%d-%m-%Y %H:%M')

# Aggregate transaction data per customer
customer_transactions = (
    transactions_df
    .groupby('CustomerID')
    .agg(
        total_spent=('TotalValue', 'sum'),
        total_quantity=('Quantity', 'sum'),
        unique_products=('ProductID', 'nunique')
    )
    .reset_index()
)

# Merge customer data with transaction summaries
customers_merged = customers_df.merge(customer_transactions, on='CustomerID', how='left').fillna(0)

# Add product category preferences
product_preferences = (
    transactions_df
    .merge(products_df, on='ProductID')
    .groupby(['CustomerID', 'Category'])
    .size()
    .unstack(fill_value=0)
)

customers_final = customers_merged.join(product_preferences, on='CustomerID')

# Handle missing values in product categories
customers_final.fillna(0, inplace=True)

# Normalize numeric features
numeric_columns = customers_final.select_dtypes(include=['number']).columns
scaler = MinMaxScaler()
normalized_features = scaler.fit_transform(customers_final[numeric_columns])

# Calculate similarity matrix
similarity_matrix = cosine_similarity(normalized_features)

# 

## Saving A looklike.csv file

In [3]:
Get top 3 lookalikes for the first 20 customers
customer_ids = customers_final['CustomerID'][:20]
lookalike_map = {}

for i, customer_id in enumerate(customer_ids):
    similarity_scores = list(enumerate(similarity_matrix[i]))
    sorted_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)[1:4]
    lookalike_map[customer_id] = [(customers_final['CustomerID'][j], score) for j, score in sorted_scores]


Save lookalike map to CSV
lookalike_df = pd.DataFrame({
    'CustomerID': lookalike_map.keys(),
    'Lookalikes': [str(lookalike_map[cust_id]) for cust_id in lookalike_map.keys()]
})

lookalike_df.to_csv('Lookalike.csv', index=False)


SyntaxError: invalid syntax (188167287.py, line 1)