In [12]:
#importing and loading of datasets
import pandas as pd
import matplotlib.pyplot as plt
from fpdf import FPDF

customers_url = "https://drive.google.com/uc?id=1bu_--mo79VdUG9oin4ybfFGRUSXAe-WE"
products_url = "https://drive.google.com/uc?id=1IKuDizVapw-hyktwfpoAoaGtHtTNHfd0"
transactions_url = "https://drive.google.com/uc?id=1saEqdbBB-vuk2hxoAf4TzDEsykdKlzbF"

customers = pd.read_csv(customers_url)
products = pd.read_csv(products_url)
transactions = pd.read_csv(transactions_url)
data = transactions.merge(customers, on='CustomerID').merge(products, on='ProductID')

# (EDA)
top_products = data.groupby('ProductName')['Quantity'].sum().sort_values(ascending=False).head(10)
revenue_by_region = data.groupby('Region')['TotalValue'].sum()

# Convertion of TransactionDate to datetime and extract monthly sales
data['TransactionDate'] = pd.to_datetime(data['TransactionDate'])
data['Month'] = data['TransactionDate'].dt.to_period('M')
monthly_sales = data.groupby('Month')['TotalValue'].sum()

# Calculation of key business insights
average_order_value = data['TotalValue'].mean()
repeat_customers = data[data.duplicated('CustomerID', keep=False)]
retention_rate = len(repeat_customers['CustomerID'].unique()) / len(customers) * 100

insights = [
    f"1. The top-selling products are in high demand, led by categories like {products['Category'].mode()[0]}.",
    f"2. {revenue_by_region.idxmax()} contributes the most revenue, making it a key target region.",
    "3. A small number of customers generate a large portion of revenue, emphasizing high-value customer importance.",
    "4. Seasonal trends show significant spikes in sales during specific months.",
    f"5. The Average Order Value (AOV) is ${average_order_value:.2f}, reflecting typical customer spending habits.",
    f"6. The customer retention rate is {retention_rate:.2f}%, indicating strong loyalty among existing customers."
]

# Create and save visualizations (graphs)
plt.figure(figsize=(10, 6))
monthly_sales.plot(kind='line', marker='o')
plt.title('Monthly Sales Trends')
plt.xlabel('Month')
plt.ylabel('Total Sales')
plt.grid()
plt.savefig("monthly_sales_trends.png")
plt.close()

plt.figure(figsize=(10, 6))
top_products.plot(kind='bar', color='skyblue')
plt.title('Top 10 Best-Selling Products')
plt.ylabel('Quantity Sold')
plt.xticks(rotation=45)
plt.savefig("top_products.png")
plt.close()


class PDF(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'EDA Business Insights', border=0, ln=1, align='C')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', align='C')

pdf = PDF()
pdf.add_page()
pdf.set_font('Arial', size=12)
pdf.cell(0, 10, 'Business Insights:', ln=1)
for insight in insights:
    pdf.multi_cell(0, 10, insight)
pdf.image("monthly_sales_trends.png", x=10, w=190)
current_y = pdf.get_y()
if current_y + 100 > 200:
    pdf.add_page()

pdf.image("top_products.png", x=10, w=190)
pdf.output("Pranav_Pakalapati_EDA.pdf")


''

In [None]:
from google.colab import files
files.download('/content/Pranav_Pakalapati_EDA.pdf')



<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>