In [None]:
# Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load Datasets
customers = pd.read_csv('Customers.csv', encoding='ISO-8859-1')
products = pd.read_csv('Products.csv', encoding='ISO-8859-1')
transactions = pd.read_csv('Transactions.csv', encoding='ISO-8859-1')

# Data Overview
print(customers.head())
print(products.head())
print(transactions.head())

# Data Cleaning
# Checking for missing values
print(customers.isnull().sum())
print(products.isnull().sum())
print(transactions.isnull().sum())

# Convert date columns to datetime
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

# Drop duplicates if any
customers = customers.drop_duplicates()
products = products.drop_duplicates()
transactions = transactions.drop_duplicates()

# Exploratory Data Analysis

# 1. Customers by Region
region_counts = customers['Region'].value_counts()
region_counts.plot(kind='bar', color='skyblue', title='Customer Distribution by Region', figsize=(8, 5))
plt.xlabel('Region')
plt.ylabel('Number of Customers')
plt.show()

# 2. Top 10 Product Categories
top_categories = products['Category'].value_counts().head(10)
top_categories.plot(kind='bar', color='orange', title='Top 10 Product Categories', figsize=(8, 5))
plt.xlabel('Category')
plt.ylabel('Count')
plt.show()

# 3. Signup Trends
customers['SignupYear'] = customers['SignupDate'].dt.year
signup_trends = customers['SignupYear'].value_counts().sort_index()
signup_trends.plot(kind='line', marker='o', title='Customer Signup Trends Over Time', figsize=(8, 5))
plt.xlabel('Year')
plt.ylabel('Number of Signups')
plt.show()

# 4. High-Spending Customers
high_spenders = transactions.groupby('CustomerID')['TotalValue'].sum().sort_values(ascending=False).head(10)
print("Top 10 High-Spending Customers:")
print(high_spenders)

# 5. Sales Trends
transactions['TransactionMonth'] = transactions['TransactionDate'].dt.to_period('M')
sales_trends = transactions.groupby('TransactionMonth')['TotalValue'].sum()
sales_trends.plot(kind='line', marker='o', title='Sales Trends Over Time', figsize=(8, 5))
plt.xlabel('Month')
plt.ylabel('Total Sales ($)')
plt.show()

# 6. Price Distribution of Products
sns.histplot(products['Price'], bins=20, kde=True, color='green')
plt.title('Price Distribution of Products')
plt.xlabel('Price ($)')
plt.ylabel('Frequency')
plt.show()

# Save cleaned datasets
customers.to_csv('Cleaned_Customers.csv', index=False)
products.to_csv('Cleaned_Products.csv', index=False)
transactions.to_csv('Cleaned_Transactions.csv', index=False)
