# EDA for eCommerce Transactions Dataset


This notebook performs Exploratory Data Analysis (EDA) on the eCommerce Transactions dataset.
The dataset consists of three files: `Customers.csv`, `Products.csv`, and `Transactions.csv`.

### Steps Included:
1. Load and inspect the data
2. Data preprocessing and cleaning
3. Exploratory analysis and visualizations
4. Business insights


In [None]:

# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set plot styles
sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)


In [None]:

# Load the datasets
customers = pd.read_csv('Customers.csv')
products = pd.read_csv('Products.csv')
transactions = pd.read_csv('Transactions.csv')

# Preview the datasets
print("Customers Head:")
print(customers.head())

print("\nProducts Head:")
print(products.head())

print("\nTransactions Head:")
print(transactions.head())


In [None]:

# Convert date columns to datetime format
customers['SignupDate'] = pd.to_datetime(customers['SignupDate'])
transactions['TransactionDate'] = pd.to_datetime(transactions['TransactionDate'])

# Merge datasets for analysis
trans_customers = pd.merge(transactions, customers, on='CustomerID', how='left')
full_data = pd.merge(trans_customers, products, on='ProductID', how='left')

# Check for missing values
print("\nMissing Values in Full Data:")
print(full_data.isnull().sum())

# Check data types
print("\nData Types in Full Data:")
print(full_data.dtypes)


In [None]:

# EDA: Distribution of Customers by Region
plt.figure()
sns.countplot(x='Region', data=customers, order=customers['Region'].value_counts().index)
plt.title("Customer Distribution by Region")
plt.xlabel("Region")
plt.ylabel("Number of Customers")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

# EDA: Monthly Transaction Trends
full_data['TransactionMonth'] = full_data['TransactionDate'].dt.to_period('M')
monthly_transactions = full_data.groupby('TransactionMonth').size().reset_index(name='Count')

plt.figure()
sns.lineplot(x='TransactionMonth', y='Count', data=monthly_transactions, marker="o")
plt.title("Monthly Transaction Trends")
plt.xlabel("Month")
plt.ylabel("Number of Transactions")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

# EDA: Sales by Product Category
category_sales = full_data.groupby('Category')['TotalValue'].sum().reset_index()

plt.figure()
sns.barplot(x='Category', y='TotalValue', data=category_sales, order=category_sales.sort_values('TotalValue', ascending=False)['Category'])
plt.title("Total Sales by Product Category")
plt.xlabel("Category")
plt.ylabel("Total Sales (USD)")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()


In [None]:

# EDA: Price Distribution of Products
plt.figure()
sns.histplot(products['Price'], kde=True)
plt.title("Distribution of Product Prices")
plt.xlabel("Price (USD)")
plt.ylabel("Frequency")
plt.tight_layout()
plt.show()


In [None]:

# EDA: Price vs Quantity Purchased
plt.figure()
sns.scatterplot(x='Price_x', y='Quantity', data=full_data)  # Price_x from transactions dataset
plt.title("Price vs. Quantity Purchased")
plt.xlabel("Transaction Price (USD)")
plt.ylabel("Quantity Purchased")
plt.tight_layout()
plt.show()
