In [None]:
#This is the technical analysis part.
#This generates the operational and executive reports.
#Use this code as reference to generate the reports for the second part of the script.

import pandas as pd

# ---------------------------------------
# 1. Load and Inspect Data
# ---------------------------------------
file_path = "data.xlsx"  # Ensure this file is in the same directory

# Load all sheets
xls = pd.ExcelFile(file_path)

df_orders = pd.read_excel(xls, sheet_name="Orders")   # Orders Sheet
df_returns = pd.read_excel(xls, sheet_name="Returns") # Returns Sheet
df_people = pd.read_excel(xls, sheet_name="People")   # People Sheet

print("=== 1. DATA LOADED SUCCESSFULLY ===\n")

# ---------------------------------------
# 2. Data Cleaning & Preparation
# ---------------------------------------

# Convert Order Date to datetime format
df_orders["Order Date"] = pd.to_datetime(df_orders["Order Date"])
df_orders["Ship Date"] = pd.to_datetime(df_orders["Ship Date"])

# Extract Year from Order Date
df_orders["Year"] = df_orders["Order Date"].dt.year

# Fill missing values
df_orders["Region"] = df_orders["Region"].fillna("Unknown")
df_people["Region"] = df_people["Region"].str.title()

# Merge Orders with People to add Regional Manager info
df_orders = df_orders.merge(df_people, on="Region", how="left")

# Merge Orders with Returns to track returned orders
df_merged = df_orders.merge(df_returns, on="Order ID", how="left")
df_merged["Returned"] = df_merged["Returned"].fillna("No")

print("\n=== DATA CLEANING COMPLETED ===\n")

# ---------------------------------------
# 3. EXECUTIVE REPORT (Yearly KPIs)
# ---------------------------------------

yearly_kpis = df_merged.groupby("Year").agg(
    Total_Sales=("Sales", "sum"),
    Total_Orders=("Order ID", "count"),
    Total_Profit=("Profit", "sum"),
    Return_Count=("Returned", lambda x: (x == "Yes").sum())
)

# Compute YoY Growth
yearly_kpis["Sales YoY Growth"] = yearly_kpis["Total_Sales"].pct_change() * 100
yearly_kpis["Profit YoY Growth"] = yearly_kpis["Total_Profit"].pct_change() * 100
yearly_kpis["Return Rate"] = (yearly_kpis["Return_Count"] / yearly_kpis["Total_Orders"]) * 100

# Region-Wise Summary
region_summary = df_merged.groupby("Region").agg(
    Region_Sales=("Sales", "sum"),
    Region_Orders=("Order ID", "count"),
    Region_Profit=("Profit", "sum"),
    Profit_Margin=("Profit", "sum")
)

# Compute Return Rate per Region
region_summary["Return Rate"] = (region_summary["Region_Orders"] / region_summary["Region_Orders"].sum()) * 100

# ---------------------------------------
# 4. OPERATIONAL REPORT (Annex II Metrics)
# ---------------------------------------

operational_report = df_merged.groupby(["Region", "State", "City"]).agg(
    Sales_Per_Person=("Sales", "sum"),
    Item_Unit_Price=("Sales", "mean"),
    Quantity_Sold=("Quantity", "sum"),
    Quantity_Quota=("Quantity", "mean"),  # Placeholder (update with actual quota if available)
    Discount_Applied=("Discount", "mean"),
    Discount_Cap=("Discount", "max"),
    Gross_Sales=("Sales", "sum"),
    Gross_Sales_Quota=("Sales", "mean")  # Placeholder (update with actual quota if available)
)

# Compute Additional Ratios
operational_report["Quantity Sold / Quantity Quota"] = (operational_report["Quantity_Sold"] / operational_report["Quantity_Quota"]) * 100
operational_report["Gross Sales / Gross Sales Quota"] = (operational_report["Gross_Sales"] / operational_report["Gross_Sales_Quota"]) * 100

# ---------------------------------------
# 5. Save All Data to a New Excel File
# ---------------------------------------
output_file = "final_report.xlsx"

with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
    df_orders.to_excel(writer, sheet_name="Cleaned Orders", index=False)
    df_returns.to_excel(writer, sheet_name="Returns", index=False)
    df_people.to_excel(writer, sheet_name="People", index=False)
    yearly_kpis.to_excel(writer, sheet_name="Executive Report - KPIs")
    region_summary.to_excel(writer, sheet_name="Executive Report - Region")
    operational_report.to_excel(writer, sheet_name="Operational Report")

print(f"\n✅ Final report saved as {output_file}")


In [None]:
import mysql.connector
import pandas as pd
from dotenv import load_dotenv
import os

# ---------------------------------------
# 1. Connect to MySQL and Fetch Data
# ---------------------------------------
load_dotenv()

MYSQL_HOST = os.getenv("MYSQL_HOST")
MYSQL_USER = os.getenv("MYSQL_USER")
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD")
MYSQL_DATABASE = os.getenv("MYSQL_DATABASE")
MYSQL_PORT = int(os.getenv("MYSQL_PORT"))

cnx = mysql.connector.connect(
    host=MYSQL_HOST,
    user=MYSQL_USER,
    password=MYSQL_PASSWORD,
    database=MYSQL_DATABASE,
    port=MYSQL_PORT
)

cursor = cnx.cursor()

# Read all tables into Pandas DataFrames
tables = ["Orders", "customers", "locations", "orderdetails", "regionalmanagers", "returns"]
dataframes = {}

for table in tables:
    query = f"SELECT * FROM {table}"
    cursor.execute(query)
    rows = cursor.fetchall()
    columns = [col[0] for col in cursor.description]
    dataframes[table] = pd.DataFrame(rows, columns=columns)

cursor.close()
cnx.close()

# Store tables in separate variables
df_orders = dataframes["Orders"]
df_customers = dataframes["customers"]
df_locations = dataframes["locations"]
df_orderdetails = dataframes["orderdetails"]
df_regionalmanagers = dataframes["regionalmanagers"]
df_returns = dataframes["returns"]

print("✅ Data successfully loaded from MySQL")

# ---------------------------------------
# 2. Data Preprocessing
# ---------------------------------------

# Convert Order Date and Ship Date to datetime
df_orders["Order Date"] = pd.to_datetime(df_orders["Order Date"])
df_orders["Ship Date"] = pd.to_datetime(df_orders["Ship Date"])

# Extract Year from Order Date
df_orders["Year"] = df_orders["Order Date"].dt.year

# Ensure "Returned" column only contains 'Yes'
df_returns["Returned"] = "Yes"

# Fill missing values
df_orders.fillna({"Ship Mode": "Unknown"}, inplace=True)

# ---------------------------------------
# 3. Merge DataFrames
# ---------------------------------------

df_merged = (
    df_orders
    .merge(df_customers, on="Customer ID", how="left")
    .merge(df_orderdetails, on="Order ID", how="left")
    .merge(df_locations, on="Postal Code", how="left")
    .merge(df_regionalmanagers, on="Region", how="left")
    .merge(df_returns, on="Order ID", how="left")
)

df_merged["Returned"].fillna("No", inplace=True)

print("✅ Data successfully merged")

# ---------------------------------------
# 4. Executive Report - KPIs (Yearly)
# ---------------------------------------

yearly_kpis = df_merged.groupby("Year").agg(
    Total_Sales=("Revenue Per Order", "sum"),
    Total_Orders=("Order ID", "count"),
    Return_Count=("Returned", lambda x: (x == "Yes").sum())
)

# Compute Return Rate
yearly_kpis["Return Rate (%)"] = (yearly_kpis["Return_Count"] / yearly_kpis["Total_Orders"]) * 100

# Compute YoY Growth
yearly_kpis["Sales YoY Growth (%)"] = yearly_kpis["Total_Sales"].pct_change() * 100

# Placeholder for profit calculation (update if real profit data exists)
yearly_kpis["Total_Profit"] = yearly_kpis["Total_Sales"] * 0.15  # Assume 15% profit margin
yearly_kpis["Profit YoY Growth (%)"] = yearly_kpis["Total_Profit"].pct_change() * 100

print("✅ Executive Report - KPIs Computed")

# ---------------------------------------
# 5. Executive Report - Regional Summary
# ---------------------------------------

regional_summary = df_merged.groupby("Region").agg(
    Region_Sales=("Revenue Per Order", "sum"),
    Region_Orders=("Order ID", "count"),
    Region_Profit=("Revenue Per Order", lambda x: x.sum() * 0.15)  # Assume 15% profit margin
)

regional_summary["Profit_Margin"] = (regional_summary["Region_Profit"] / regional_summary["Region_Sales"]) * 100
regional_summary["Return Rate (%)"] = (
    df_merged[df_merged["Returned"] == "Yes"].groupby("Region")["Order ID"].count()
    / regional_summary["Region_Orders"] * 100
).fillna(0)

print("✅ Executive Report - Regional Summary Computed")

# ---------------------------------------
# 6. Operational Report (Without Quotas)
# ---------------------------------------

operational_report = df_merged.groupby(["Region", "State", "City"]).agg(
    Sales_Per_Person=("Revenue Per Order", "sum"),
    Item_Unit_Price=("Revenue Per Order", "mean"),
    Quantity_Sold=("Revenue Per Order", "count"),  # Placeholder for actual quantity
    Discount_Applied=("Revenue Per Order", "mean"),  # Placeholder for discount
    Discount_Cap=("Revenue Per Order", "max"),  # Placeholder for max discount
    Gross_Sales=("Revenue Per Order", "sum")
)

print("✅ Operational Report Computed")

# ---------------------------------------
# 7. Save Reports to Excel
# ---------------------------------------

output_file = "final_report_mysql.xlsx"

with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
    yearly_kpis.to_excel(writer, sheet_name="Executive Report - KPIs")
    regional_summary.to_excel(writer, sheet_name="Executive Report - Region")
    operational_report.to_excel(writer, sheet_name="Operational Report")

print(f"\n✅ Final report saved as {output_file}")


✅ Data successfully loaded from MySQL
✅ Data successfully merged
✅ Executive Report - KPIs Computed
✅ Executive Report - Regional Summary Computed
✅ Operational Report Computed


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_merged["Returned"].fillna("No", inplace=True)


ProgrammingError: 1054 (42S22): Unknown column 'nan' in 'field list'