In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime

# Read the CSV files
customers_df = pd.read_csv('Customers.csv')
transactions_df = pd.read_csv('Transactions.csv')
products_df = pd.read_csv('Products.csv')

# Data preprocessing
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])

# Customer Analysis
def analyze_customers():
    # Region distribution
    region_dist = customers_df['Region'].value_counts()
    
    # Customer signup trends
    customers_df['SignupMonth'] = customers_df['SignupDate'].dt.to_period('M')
    monthly_signups = customers_df.groupby('SignupMonth').size()
    
    return region_dist, monthly_signups

# Transaction Analysis
def analyze_transactions():
    # Merge transactions with customer and product information
    merged_df = transactions_df.merge(customers_df, on='CustomerID')
    merged_df = merged_df.merge(products_df, on='ProductID')
    
    # Calculate customer lifetime value (CLV)
    customer_ltv = merged_df.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False)
    
    # Analyze product categories
    category_sales = merged_df.groupby('Category')['TotalValue'].sum().sort_values(ascending=False)
    
    # Calculate average transaction value by region
    avg_transaction_by_region = merged_df.groupby('Region')['TotalValue'].mean()
    
    # Analyze purchase frequency
    purchase_frequency = merged_df.groupby('CustomerID').size().mean()
    
    return customer_ltv, category_sales, avg_transaction_by_region, purchase_frequency

# Product Analysis
def analyze_products():
    # Product category distribution
    category_dist = products_df['Category'].value_counts()
    
    # Price distribution statistics
    price_stats = products_df['Price'].describe()
    
    return category_dist, price_stats

# Generate visualizations
def create_visualizations():
    plt.figure(figsize=(15, 10))
    
    # Customer region distribution
    plt.subplot(2, 2, 1)
    sns.countplot(data=customers_df, x='Region')
    plt.title('Customer Distribution by Region')
    plt.xticks(rotation=45)
    
    # Transaction value distribution
    plt.subplot(2, 2, 2)
    sns.histplot(data=transactions_df, x='TotalValue', bins=30)
    plt.title('Transaction Value Distribution')
    
    # Product category distribution
    plt.subplot(2, 2, 3)
    sns.countplot(data=products_df, x='Category')
    plt.title('Product Category Distribution')
    plt.xticks(rotation=45)
    
    # Save the plot
    plt.tight_layout()
    plt.savefig('eda_visualizations.png')

# Run the analysis
region_dist, monthly_signups = analyze_customers()
customer_ltv, category_sales, avg_transaction_by_region, purchase_frequency = analyze_transactions()
category_dist, price_stats = analyze_products()

# Print insights
print("\nBusiness Insights:")
print("\n1. Customer Geographic Distribution:")
print(region_dist)

print("\n2. Top Product Categories by Sales:")
print(category_sales)

print("\n3. Average Transaction Value by Region:")
print(avg_transaction_by_region)

print("\n4. Product Price Statistics:")
print(price_stats)

print("\n5. Average Purchase Frequency per Customer:")
print(f"Average number of transactions per customer: {purchase_frequency:.2f}")


Business Insights:

1. Customer Geographic Distribution:
Region
South America    59
Europe           50
North America    46
Asia             45
Name: count, dtype: int64

2. Top Product Categories by Sales:
Category
Books          192147.47
Electronics    180783.50
Clothing       166170.66
Home Decor     150893.93
Name: TotalValue, dtype: float64

3. Average Transaction Value by Region:
Region
Asia             697.591606
Europe           710.489872
North America    624.235246
South America    721.554474
Name: TotalValue, dtype: float64

4. Product Price Statistics:
count    100.000000
mean     267.551700
std      143.219383
min       16.080000
25%      147.767500
50%      292.875000
75%      397.090000
max      497.760000
Name: Price, dtype: float64

5. Average Purchase Frequency per Customer:
Average number of transactions per customer: 5.03
