# Exploratory Data Analysis (EDA)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the datasets
customers = pd.read_csv('./Customers.csv')
products = pd.read_csv('./Products.csv')
transactions = pd.read_csv('./Transactions.csv')

# EDA: Customer Distribution by Region
region_customer_count = customers['Region'].value_counts().reset_index()
region_customer_count.columns = ['Region', 'CustomerCount']
plt.figure(figsize=(10,6))
sns.barplot(x='Region', y='CustomerCount', data=region_customer_count)
plt.title('Customer Distribution by Region')
plt.xlabel('Region')
plt.ylabel('Number of Customers')
plt.show()

# EDA: Price Distribution of Products
plt.figure(figsize=(10,6))
sns.histplot(products['Price'], bins=30, kde=True)
plt.title('Price Distribution of Products')
plt.xlabel('Price (USD)')
plt.ylabel('Frequency')
plt.show()

# EDA: Distribution of total transaction values
merged_data = pd.merge(transactions, customers, on='CustomerID')
merged_data = pd.merge(merged_data, products, on='ProductID')

plt.figure(figsize=(10,6))
sns.histplot(merged_data['TotalValue'], bins=30, kde=True)
plt.title('Distribution of Total Transaction Values')
plt.xlabel('Total Transaction Value (USD)')
plt.ylabel('Frequency')
plt.show()

# EDA: Total value of transactions by region
region_transaction_value = merged_data.groupby('Region')['TotalValue'].sum().reset_index()
plt.figure(figsize=(12,6))
sns.barplot(x='Region', y='TotalValue', data=region_transaction_value)
plt.title('Total Transaction Value by Region')
plt.xlabel('Region')
plt.ylabel('Total Transaction Value (USD)')
plt.show()

# EDA: Customer activity by signup date
merged_data['SignupDate'] = pd.to_datetime(merged_data['SignupDate'])
signup_activity = merged_data.groupby(merged_data['SignupDate'].dt.year)['CustomerID'].nunique().reset_index()
plt.figure(figsize=(10,6))
sns.lineplot(x='SignupDate', y='CustomerID', data=signup_activity)
plt.title('Customer Signup Activity Over the Years')
plt.xlabel('Year')
plt.ylabel('Number of Customers')
plt.show()

# EDA: Average transaction value by product category
category_transaction_value = merged_data.groupby('Category')['TotalValue'].mean().reset_index()
plt.figure(figsize=(12,6))
sns.barplot(x='Category', y='TotalValue', data=category_transaction_value)
plt.title('Average Transaction Value by Product Category')
plt.xlabel('Product Category')
plt.ylabel('Average Transaction Value (USD)')
plt.xticks(rotation=45)
plt.show()