In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Display the first few rows of each dataset
print(customers.head())
print(products.head())
print(transactions.head())

# Check for missing values
print(customers.isnull().sum())
print(products.isnull().sum())
print(transactions.isnull().sum())

# Basic statistics
print(customers.describe())
print(products.describe())
print(transactions.describe())

  CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15
  ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31
  TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  2024-04-25 07:38:55         1   
3       

## Task - 2

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

# Load the datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Merge transactions with customers and products
merged_data = pd.merge(transactions, customers, on='CustomerID', how='left')
merged_data = pd.merge(merged_data, products, on='ProductID', how='left')

# Create customer profiles
customer_profiles = merged_data.groupby('CustomerID').agg({
    'TotalValue': 'sum',  # Total spending by the customer
    'Quantity': 'sum',    # Total quantity purchased by the customer
    'Price_x': 'mean',    # Average price of products purchased (from Transactions.csv)
    'Region': 'first',    # Region of the customer
    'Category': lambda x: x.mode()[0]  # Most frequent category purchased
}).reset_index()

# Rename columns for clarity
customer_profiles.rename(columns={'Price_x': 'AvgPrice'}, inplace=True)

# Normalize the data for similarity calculation
scaler = MinMaxScaler()
customer_profiles[['TotalValue', 'Quantity', 'AvgPrice']] = scaler.fit_transform(
    customer_profiles[['TotalValue', 'Quantity', 'AvgPrice']]
)

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

# Calculate cosine similarity between customers
similarity_matrix = cosine_similarity(customer_profiles[['TotalValue', 'Quantity', 'AvgPrice']])

# Function to get top 3 similar customers
def get_top_similar_customers(customer_id, similarity_matrix, top_n=3):
    customer_index = customer_profiles[customer_profiles['CustomerID'] == customer_id].index[0]
    similarities = similarity_matrix[customer_index]
    top_indices = similarities.argsort()[-top_n-1:-1][::-1]  # Get top N similar customers
    top_customers = customer_profiles.iloc[top_indices]['CustomerID'].tolist()
    top_scores = similarities[top_indices].tolist()
    return list(zip(top_customers, top_scores))

# Generate lookalike recommendations for the first 20 customers
lookalike_recommendations = {}
for customer_id in customer_profiles['CustomerID'].iloc[:20]:
    lookalike_recommendations[customer_id] = get_top_similar_customers(customer_id, similarity_matrix)

# Save the results to a CSV file
import csv

with open('Lookalike.csv', 'w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(['CustomerID', 'LookalikeID', 'SimilarityScore'])
    for customer_id, recommendations in lookalike_recommendations.items():
        for lookalike_id, score in recommendations:
            writer.writerow([customer_id, lookalike_id, score])