In [None]:
#This is the technical analysis part.
#This generates the operational and executive reports.
#Use this code as reference to generate the reports for the second part of the script.

import pandas as pd

# ---------------------------------------
# 1. Load and Inspect Data
# ---------------------------------------
file_path = "data.xlsx"  # Ensure this file is in the same directory

# Load all sheets
xls = pd.ExcelFile(file_path)

df_orders = pd.read_excel(xls, sheet_name="Orders")   # Orders Sheet
df_returns = pd.read_excel(xls, sheet_name="Returns") # Returns Sheet
df_people = pd.read_excel(xls, sheet_name="People")   # People Sheet

print("=== 1. DATA LOADED SUCCESSFULLY ===\n")

# ---------------------------------------
# 2. Data Cleaning & Preparation
# ---------------------------------------

# Convert Order Date to datetime format
df_orders["Order Date"] = pd.to_datetime(df_orders["Order Date"])
df_orders["Ship Date"] = pd.to_datetime(df_orders["Ship Date"])

# Extract Year from Order Date
df_orders["Year"] = df_orders["Order Date"].dt.year

# Fill missing values
df_orders["Region"] = df_orders["Region"].fillna("Unknown")
df_people["Region"] = df_people["Region"].str.title()

# Merge Orders with People to add Regional Manager info
df_orders = df_orders.merge(df_people, on="Region", how="left")

# Merge Orders with Returns to track returned orders
df_merged = df_orders.merge(df_returns, on="Order ID", how="left")
df_merged["Returned"] = df_merged["Returned"].fillna("No")

print("\n=== DATA CLEANING COMPLETED ===\n")

# ---------------------------------------
# 3. EXECUTIVE REPORT (Yearly KPIs)
# ---------------------------------------

yearly_kpis = df_merged.groupby("Year").agg(
    Total_Sales=("Sales", "sum"),
    Total_Orders=("Order ID", "count"),
    Total_Profit=("Profit", "sum"),
    Return_Count=("Returned", lambda x: (x == "Yes").sum())
)

# Compute YoY Growth
yearly_kpis["Sales YoY Growth (%)"] = yearly_kpis["Total_Sales"].pct_change() * 100
yearly_kpis["Profit YoY Growth (%)"] = yearly_kpis["Total_Profit"].pct_change() * 100
yearly_kpis["Return Rate (%)"] = (yearly_kpis["Return_Count"] / yearly_kpis["Total_Orders"]) * 100

# Region-Wise Summary
region_summary = df_merged.groupby("Region").agg(
    Region_Sales=("Sales", "sum"),
    Region_Orders=("Order ID", "count"),
    Region_Profit=("Profit", "sum"),
    Profit_Margin=("Profit", "sum")
)

# Compute Return Rate per Region
region_summary["Return Rate (%)"] = (region_summary["Region_Orders"] / region_summary["Region_Orders"].sum()) * 100

# ---------------------------------------
# 4. OPERATIONAL REPORT (Annex II Metrics)
# ---------------------------------------

operational_report = df_merged.groupby(["Region", "State", "City"]).agg(
    Sales_Per_Person=("Sales", "sum"),
    Item_Unit_Price=("Sales", "mean"),
    Quantity_Sold=("Quantity", "sum"),
    Quantity_Quota=("Quantity", "mean"),  # Placeholder (update with actual quota if available)
    Discount_Applied=("Discount", "mean"),
    Discount_Cap=("Discount", "max"),
    Gross_Sales=("Sales", "sum"),
    Gross_Sales_Quota=("Sales", "mean")  # Placeholder (update with actual quota if available)
)

# Compute Additional Ratios
operational_report["Quantity Sold / Quantity Quota (%)"] = (operational_report["Quantity_Sold"] / operational_report["Quantity_Quota"]) * 100
operational_report["Gross Sales / Gross Sales Quota (%)"] = (operational_report["Gross_Sales"] / operational_report["Gross_Sales_Quota"]) * 100

# ---------------------------------------
# 5. Save All Data to a New Excel File
# ---------------------------------------
output_file = "final_report.xlsx"

with pd.ExcelWriter(output_file, engine="xlsxwriter") as writer:
    df_orders.to_excel(writer, sheet_name="Cleaned Orders", index=False)
    df_returns.to_excel(writer, sheet_name="Returns", index=False)
    df_people.to_excel(writer, sheet_name="People", index=False)
    yearly_kpis.to_excel(writer, sheet_name="Executive Report - KPIs")
    region_summary.to_excel(writer, sheet_name="Executive Report - Region")
    operational_report.to_excel(writer, sheet_name="Operational Report")

print(f"\n✅ Final report saved as {output_file}")


In [1]:
import mysql.connector
import pandas as pd
from dotenv import load_dotenv
import os

# Load environment variables from .env file
load_dotenv()

# Read credentials from the .env file
MYSQL_HOST = os.getenv("MYSQL_HOST")
MYSQL_USER = os.getenv("MYSQL_USER")
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD")
MYSQL_DATABASE = os.getenv("MYSQL_DATABASE")
MYSQL_PORT = int(os.getenv("MYSQL_PORT"))  # Convert port to integer

# Connect to MySQL
cnx = mysql.connector.connect(
    host=MYSQL_HOST,
    user=MYSQL_USER,
    password=MYSQL_PASSWORD,
    database=MYSQL_DATABASE,
    port=MYSQL_PORT
)

# Create a cursor object
cursor = cnx.cursor()

# Dictionary to store data from each table
orders_data = None
customers_data = None
locations_data = None
orderdetails_data = None
regionalmanagers_data = None
returns_data = None

# Fetch and store data in different variables
tables = {
    "Orders": "orders_data",
    "customers": "customers_data",
    "locations": "locations_data",
    "orderdetails": "orderdetails_data",
    "regionalmanagers": "regionalmanagers_data",
    "returns": "returns_data"
}

for table, var_name in tables.items():
    query = f"SELECT * FROM {table} LIMIT 5"  # Fetch first 5 rows
    cursor.execute(query)
    
    # Fetch data and column names
    rows = cursor.fetchall()
    columns = [col[0] for col in cursor.description]  # Get column names
    
    # Convert to DataFrame
    df = pd.DataFrame(rows, columns=columns)
    
    # Store DataFrame in variable dynamically
    globals()[var_name] = df

# Close cursor and connection
cursor.close()
cnx.close()

# Print stored data
print("\n--- Orders Table (Top 5 Rows) ---\n", orders_data)
print("\n--- Customers Table (Top 5 Rows) ---\n", customers_data)
print("\n--- Locations Table (Top 5 Rows) ---\n", locations_data)
print("\n--- Order Details Table (Top 5 Rows) ---\n", orderdetails_data)
print("\n--- Regional Managers Table (Top 5 Rows) ---\n", regionalmanagers_data)
print("\n--- Returns Table (Top 5 Rows) ---\n", returns_data)



--- Orders Table (Top 5 Rows) ---
          Order ID           Order Date            Ship Date       Ship Mode  \
0  CA-2020-152156  2020-11-08 00:00:00  2020-11-11 00:00:00    Second Class   
1  CA-2020-138688  2020-06-12 00:00:00  2020-06-16 00:00:00    Second Class   
2  US-2019-108966  2019-10-11 00:00:00  2019-10-18 00:00:00  Standard Class   
3  CA-2018-115812  2018-06-09 00:00:00  2018-06-14 00:00:00  Standard Class   
4  CA-2021-114412  2021-04-15 00:00:00  2021-04-20 00:00:00  Standard Class   

   Days to Ship Customer ID  Postal Code  
0             3    CG-12520        42420  
1             4    DV-13045        90036  
2             7    SO-20335        33311  
3             5    BH-11710        90032  
4             5    AA-10480        28027  

--- Customers Table (Top 5 Rows) ---
   Customer ID    Customer Name    Segment High Value Customer  \
0    CG-12520      Claire Gute   Consumer                  No   
1    DV-13045  Darrin Van Huff  Corporate                  No 