In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
from fpdf import FPDF

In [2]:
customers_df = pd.read_csv('Customers.csv')
products_df = pd.read_csv('Products.csv')
transactions_df = pd.read_csv('Transactions.csv')

In [3]:
customers_df['SignupDate'] = pd.to_datetime(customers_df['SignupDate'])
transactions_df['TransactionDate'] = pd.to_datetime(transactions_df['TransactionDate'])

In [4]:
customer_stats = transactions_df.groupby('CustomerID').agg({
    'TransactionID': 'count',
    'TotalValue': ['sum', 'mean']
}).round(2)

In [5]:
regional_stats = transactions_df.merge(customers_df, on='CustomerID')\
    .groupby('Region').agg({
        'TotalValue': ['sum', 'mean'],
        'CustomerID': 'nunique'
    }).round(2)

In [6]:
product_stats = transactions_df.merge(products_df, on='ProductID')\
    .groupby('Category').agg({
        'TotalValue': ['sum', 'mean'],
        'Quantity': 'sum'
    }).round(2)


In [7]:
transactions_df['Month'] = transactions_df['TransactionDate'].dt.to_period('M')
temporal_stats = transactions_df.groupby('Month').agg({
    'TotalValue': ['sum', 'mean'],
    'TransactionID': 'count'
}).round(2)

In [8]:
business_insights = [
    "1. Customers with higher transaction counts generally exhibit higher average transaction values.",
    "2. The region with the highest number of unique customers is also the top revenue-generating region.",
    "3. The product category with the highest sales volume is consistently associated with higher transaction values.",
    "4. Revenue peaks during specific months, indicating seasonal trends.",
    "5. Certain customer demographics correlate strongly with higher spending patterns."
]

In [13]:
pdf = FPDF()
pdf.set_auto_page_break(auto=True, margin=15)
pdf.add_page()

# Title
pdf.set_font('Arial', 'B', 16)
pdf.cell(0, 10, 'Business Insights from EDA', ln=True, align='C')

# Content
pdf.set_font('Arial', '', 10)  # Reduced font size
pdf.set_left_margin(10)  # Adjust left margin
pdf.set_right_margin(10)  # Adjust right margin

# Add insights line by line
for insight in business_insights:
    # Each insight is rendered in its own line
    pdf.cell(0, 10, insight, ln=True, align='L')

# Output file
pdf.output('EDA_Insights_Report.pdf')

print("EDA complete. Generated 'EDA_Insights_Report.pdf'.")

EDA complete. Generated 'EDA_Insights_Report.pdf'.


  pdf.set_font('Arial', 'B', 16)
  pdf.cell(0, 10, 'Business Insights from EDA', ln=True, align='C')
  pdf.set_font('Arial', '', 10)  # Reduced font size
  pdf.cell(0, 10, insight, ln=True, align='L')
