In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings as wr
wr.filterwarnings('ignore')

In [17]:
customers = pd.read_csv('Customers.csv')  # Customer details
products = pd.read_csv('Products.csv')    # Product information
transactions = pd.read_csv('Transactions.csv')  # Transaction details

# Preview datasets
print("Customers:\n", customers.head())
print("Products:\n", products.head())
print("Transactions:\n", transactions.head())


Customers:
   CustomerID        CustomerName         Region  SignupDate
0      C0001    Lawrence Carroll  South America  2022-07-10
1      C0002      Elizabeth Lutz           Asia  2022-02-13
2      C0003      Michael Rivera  South America  2024-03-07
3      C0004  Kathleen Rodriguez  South America  2022-10-09
4      C0005         Laura Weber           Asia  2022-08-15
Products:
   ProductID              ProductName     Category   Price
0      P001     ActiveWear Biography        Books  169.30
1      P002    ActiveWear Smartwatch  Electronics  346.30
2      P003  ComfortLiving Biography        Books   44.12
3      P004            BookWorld Rug   Home Decor   95.69
4      P005          TechPro T-Shirt     Clothing  429.31
Transactions:
   TransactionID CustomerID ProductID      TransactionDate  Quantity  \
0        T00001      C0199      P067  2024-08-25 12:38:23         1   
1        T00112      C0146      P067  2024-05-27 22:23:54         1   
2        T00166      C0127      P067  202

In [19]:
print("Missing values in Customers:\n", customers.isnull().sum())
print("Missing values in Products:\n", products.isnull().sum())
print("Missing values in Transactions:\n", transactions.isnull().sum())

Missing values in Customers:
 CustomerID      0
CustomerName    0
Region          0
SignupDate      0
dtype: int64
Missing values in Products:
 ProductID      0
ProductName    0
Category       0
Price          0
dtype: int64
Missing values in Transactions:
 TransactionID      0
CustomerID         0
ProductID          0
TransactionDate    0
Quantity           0
TotalValue         0
Price              0
dtype: int64


In [None]:
trans_prod = transactions.merge(products, on='product_id', how='left')

# Merge transactions with customers
full_data = trans_prod.merge(customers, on='customer_id', how='left')

# 4. Check top-performing products
top_products = full_data.groupby('product_name').agg(
    total_quantity=('quantity', 'sum'),
    total_revenue=('amount', 'sum')
).sort_values(by='total_revenue', ascending=False)
print("Top Products:\n", top_products.head())

# 5. Analyze customer segments
customer_segment_analysis = full_data.groupby('customer_segment').agg(
    total_customers=('customer_id', 'nunique'),
    total_revenue=('amount', 'sum'),
    avg_revenue_per_customer=('amount', 'mean')
).sort_values(by='total_revenue', ascending=False)
print("Customer Segment Analysis:\n", customer_segment_analysis)

# 6. Seasonal trends (sales by month)
full_data['month'] = pd.to_datetime(full_data['date']).dt.month
monthly_sales = full_data.groupby('month').agg(
    total_revenue=('amount', 'sum'),
    total_transactions=('transaction_id', 'nunique')
).sort_values(by='total_revenue', ascending=False)
print("Monthly Sales:\n", monthly_sales)

# 7. Underperforming products
underperforming_products = top_products[top_products['total_quantity'] == 0]
print("Underperforming Products:\n", underperforming_products)

# 8. Visualize revenue by product
plt.figure(figsize=(10, 6))
top_products['total_revenue'].head(10).plot(kind='bar', color='skyblue')
plt.title("Top 10 Products by Revenue")
plt.xlabel("Product Name")
plt.ylabel("Revenue")
plt.show()