In [None]:
import pandas as pd



In [None]:
# Load the datasets
customers = pd.read_csv('/content/Customers.csv')
products = pd.read_csv('/content/Products.csv')
transactions = pd.read_csv('/content/Transactions.csv')

print(customers.head())
print(products.head())
print(transactions.head())


  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15
  ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067   2024-04-25 7:38:55         1   
3       

#Step 2: Data Cleaning and Feature *Engineering*

In [None]:
# Extract the month and year from SignupDate
customers['SignupYear'] = pd.to_datetime(customers['SignupDate']).dt.year
customers['SignupMonth'] = pd.to_datetime(customers['SignupDate']).dt.month

# Aggregate customer transaction data
customer_transactions = transactions.groupby('CustomerID').agg(
    total_spent=('TotalValue', 'sum'),
    transaction_count=('TransactionID', 'count')
).reset_index()

# Merge with the customers dataset to get complete customer profile
customer_data = pd.merge(customers, customer_transactions, on='CustomerID', how='left')

# Merge transaction data with product information
transactions_with_products = pd.merge(transactions, products[['ProductID', 'Category']], on='ProductID', how='left')


#Step 3: Similarity Calculation

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics.pairwise import cosine_similarity

# Create a customer-category interaction matrix
customer_product_matrix = pd.pivot_table(
    transactions_with_products,
    index='CustomerID',
    columns='Category',
    values='Quantity',
    aggfunc='sum',
    fill_value=0
)

# Normalize the interaction matrix (optional, based on the model's needs)
scaler = StandardScaler()
customer_product_matrix_scaled = scaler.fit_transform(customer_product_matrix)

# Calculate the cosine similarity between customers
similarity_matrix = cosine_similarity(customer_product_matrix_scaled)

# Convert the similarity matrix into a DataFrame for easier interpretation
similarity_df = pd.DataFrame(similarity_matrix, index=customer_product_matrix.index, columns=customer_product_matrix.index)


#Step 4: Lookalike Recommendation

In [None]:
lookalike_recommendations = {}

# For the first 20 customers (CustomerID: C0001 to C0020)
for customer_id in customer_data['CustomerID'][:20]:
    # Get the similarity scores for the current customer
    similarity_scores = similarity_df[customer_id]

    # Sort the similarity scores and get the top 3 most similar customers (excluding the customer itself)
    similar_customers = similarity_scores.drop(customer_id).sort_values(ascending=False).head(3)

    # Store the results
    lookalike_recommendations[customer_id] = [(cust_id, score) for cust_id, score in zip(similar_customers.index, similar_customers.values)]

# Display the recommendations for the first customer
print(lookalike_recommendations['C0001'])


[('C0157', 0.9602751057942884), ('C0069', 0.9364788887212507), ('C0026', 0.9277254827838058)]


#Step 5: Save Recommendations to CSV

In [None]:
# Convert the lookalike recommendations to a DataFrame
lookalike_df = pd.DataFrame([
    {'CustomerID': cust_id, 'Lookalikes': str(recommendations)}
    for cust_id, recommendations in lookalike_recommendations.items()
])

# Save to CSV
lookalike_df.to_csv('Lookalike.csv', index=False)
