In [1]:
import pandas as pd
import plotly.express as px
import plotly.io as pio  # Saving as PDF
import logging

logging.basicConfig(filename="plotly_graphs.log", level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logging.info("Script started: Loading dataset...")

try:
    df = pd.read_excel("Superstore.xlsx")
    df.columns = df.columns.str.strip().str.replace(" ", "_").str.lower()
    df.rename(columns={"sub-category": "sub_category"}, inplace=True)  # Ensuring consistency
    df["order_date"] = pd.to_datetime(df["order_date"])
    logging.info("Dataset loaded successfully.")
except Exception as e:
    logging.error(f"Error loading dataset: {e}")

def generate_plotly_graphs(df):
    try:
        logging.info("Generating Sales by Category (Bar Chart)...")
        fig1 = px.bar(df.groupby("category")["sales"].sum().reset_index(), x="category", y="sales", 
                      color="category", title="Total Sales by Category")
        pio.write_image(fig1, "sales_by_category.pdf")
        logging.info("Sales by Category chart saved.")
    except Exception as e:
        logging.error(f"Error generating Sales by Category chart: {e}")

    try:
        logging.info("Generating Profit by Region (Bar Chart)...")
        fig2 = px.bar(df.groupby("region")["profit"].sum().reset_index(), x="region", y="profit", 
                      color="region", title="Total Profit by Region")
        pio.write_image(fig2, "profit_by_region.pdf")
        logging.info("Profit by Region chart saved.")
    except Exception as e:
        logging.error(f"Error generating Profit by Region chart: {e}")

    try:
        logging.info("Generating Order Quantity Distribution (Histogram)...")
        fig3 = px.histogram(df, x="quantity", nbins=20, color_discrete_sequence=["blue"], 
                            title="Order Quantity Distribution")
        pio.write_image(fig3, "quantity_distribution.pdf")
        logging.info("Order Quantity Distribution chart saved.")
    except Exception as e:
        logging.error(f"Error generating Order Quantity Distribution chart: {e}")

    try:
        logging.info("Generating Sales Trend Over Time (Line Chart)...")
        sales_trend = df.set_index("order_date")["sales"].resample("ME").sum().reset_index()  # Corrected 'M' to 'ME'
        fig4 = px.line(sales_trend, x="order_date", y="sales", title="Sales Trend Over Time", markers=True)
        pio.write_image(fig4, "sales_trend.pdf")
        logging.info("Sales Trend Over Time chart saved.")
    except Exception as e:
        logging.error(f"Error generating Sales Trend Over Time chart: {e}")

    try:
        logging.info("Generating Sales vs Profit (Scatter Plot)...")
        fig5 = px.scatter(df, x="sales", y="profit", color="category", title="Sales vs Profit", trendline="ols")
        pio.write_image(fig5, "sales_vs_profit.pdf")
        logging.info("Sales vs Profit scatter plot saved.")
    except Exception as e:
        logging.error(f"Error generating Sales vs Profit chart: {e}")

    try:
        logging.info("Generating Sales by Sub-Category (Treemap)...")
        fig6 = px.treemap(df, path=["category", "sub_category"], values="sales", title="Sales by Sub-Category", 
                          color="sales", color_continuous_scale="Viridis")
        pio.write_image(fig6, "sales_by_subcategory.pdf")
        logging.info("Sales by Sub-Category treemap saved.")
    except Exception as e:
        logging.error(f"Error generating Sales by Sub-Category chart: {e}")

    try:
        logging.info("Generating Top 10 Customers by Sales (Bar Chart)...")
        top_customers = df.groupby("customer_name")["sales"].sum().nlargest(10).reset_index()
        fig7 = px.bar(top_customers, x="customer_name", y="sales", color="sales", title="Top 10 Customers by Sales")
        pio.write_image(fig7, "top_customers_sales.pdf")
        logging.info("Top Customers by Sales chart saved.")
    except Exception as e:
        logging.error(f"Error generating Top Customers by Sales chart: {e}")

    try:
        logging.info("Generating Sales by Discount Range (Box Plot)...")
        fig8 = px.box(df, x="category", y="discount", color="category", title="Sales by Discount Range")
        pio.write_image(fig8, "sales_by_discount.pdf")
        logging.info("Sales by Discount Range box plot saved.")
    except Exception as e:
        logging.error(f"Error generating Sales by Discount Range chart: {e}")

    try:
        logging.info("Generating Order Distribution by Ship Mode (Pie Chart)...")
        fig9 = px.pie(df, names="ship_mode", title="Order Distribution by Ship Mode", hole=0.3, 
                      color_discrete_sequence=px.colors.qualitative.Pastel)
        pio.write_image(fig9, "orders_by_shipmode.pdf")
        logging.info("Order Distribution by Ship Mode pie chart saved.")
    except Exception as e:
        logging.error(f"Error generating Order Distribution by Ship Mode chart: {e}")

    try:
        logging.info("Generating Category-wise Sales Over Time (Area Chart)...")
        sales_time = df.groupby(["order_date", "category"])["sales"].sum().reset_index()
        fig10 = px.area(sales_time, x="order_date", y="sales", color="category", 
                        title="Category-wise Sales Over Time", line_group="category")
        pio.write_image(fig10, "category_sales_over_time.pdf")
        logging.info("Category-wise Sales Over Time chart saved.")
    except Exception as e:
        logging.error(f"Error generating Category-wise Sales Over Time chart: {e}")

    logging.info("All graphs generated successfully.")

generate_plotly_graphs(df)
