# Exploratory Data Analysis and Business Insights

This notebook contains the analysis of customer transactions data to derive business insights.

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better visualizations
plt.style.use('seaborn')
sns.set_palette('Set2')

## Data Loading

In [None]:
# Load the datasets
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

# Display basic information about the datasets
print("\nCustomers Dataset Info:")
print(customers_df.info())
print("\nProducts Dataset Info:")
print(products_df.info())
print("\nTransactions Dataset Info:")
print(transactions_df.info())

## Data Overview and Basic Statistics

In [None]:
# Display first few rows of each dataset
print("\nCustomers Dataset Preview:")
display(customers_df.head())
print("\nProducts Dataset Preview:")
display(products_df.head())
print("\nTransactions Dataset Preview:")
display(transactions_df.head())

In [None]:
# Basic statistics for numerical columns
print("\nCustomers Dataset Statistics:")
display(customers_df.describe())
print("\nProducts Dataset Statistics:")
display(products_df.describe())
print("\nTransactions Dataset Statistics:")
display(transactions_df.describe())

## Customer Analysis

In [None]:
# Analyze customer demographics
plt.figure(figsize=(12, 6))
customers_df['age_group'].value_counts().plot(kind='bar')
plt.title('Distribution of Customers by Age Group')
plt.xlabel('Age Group')
plt.ylabel('Number of Customers')
plt.xticks(rotation=45)
plt.show()

# Gender distribution
plt.figure(figsize=(8, 8))
customers_df['gender'].value_counts().plot(kind='pie', autopct='%1.1f%%')
plt.title('Customer Gender Distribution')
plt.show()

## Transaction Analysis

In [None]:
# Merge transactions with products to get product details
transactions_with_products = pd.merge(transactions_df, products_df, on='product_id')

# Analyze sales by product category
category_sales = transactions_with_products.groupby('category')['quantity'].sum().sort_values(ascending=False)

plt.figure(figsize=(12, 6))
category_sales.plot(kind='bar')
plt.title('Total Sales by Product Category')
plt.xlabel('Category')
plt.ylabel('Total Quantity Sold')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Calculate revenue per transaction
transactions_with_products['revenue'] = transactions_with_products['quantity'] * transactions_with_products['price']

# Monthly revenue trend
transactions_with_products['transaction_date'] = pd.to_datetime(transactions_with_products['transaction_date'])
monthly_revenue = transactions_with_products.groupby(transactions_with_products['transaction_date'].dt.to_period('M'))['revenue'].sum()

plt.figure(figsize=(15, 6))
monthly_revenue.plot(kind='line', marker='o')
plt.title('Monthly Revenue Trend')
plt.xlabel('Month')
plt.ylabel('Revenue')
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

## Customer Segmentation and Purchase Behavior

In [None]:
# Merge all datasets
complete_data = pd.merge(transactions_with_products, customers_df, on='customer_id')

# Analyze average purchase value by age group
avg_purchase_by_age = complete_data.groupby('age_group')['revenue'].mean().sort_values(ascending=False)

plt.figure(figsize=(12, 6))
avg_purchase_by_age.plot(kind='bar')
plt.title('Average Purchase Value by Age Group')
plt.xlabel('Age Group')
plt.ylabel('Average Purchase Value')
plt.xticks(rotation=45)
plt.show()

## Business Insights

Based on the above analysis, here are the key business insights:

1. [Will be filled after analyzing the actual data]
2. [Will be filled after analyzing the actual data]
3. [Will be filled after analyzing the actual data]
4. [Will be filled after analyzing the actual data]
5. [Will be filled after analyzing the actual data]