In [1]:
import sys
sys.path.append("..")

from src.data_processing.loader import load_dataset, summarize_sales_by_region_year, summarize_models_by_region_year, explore_key_drivers_of_sales
from src.llm.agent import LLMReportAgent
from src.reporting.markdown_builder import build_markdown_report

In [2]:
# Provide the path to the Excel dataset
dataset_path = "../datasets/BMW sales data (2020-2024).xlsx"

# Load data
df = load_dataset(dataset_path)

# Preprocess
sales_summary = summarize_sales_by_region_year(df, "../reports/sales_summary.json")

model_summary = summarize_models_by_region_year(df, "../reports/models_summary.json")

# Explore key drivers of sales
sales_drivers = explore_key_drivers_of_sales(df)
print(sales_drivers)

                      Correlation_with_Sales_Volume
Sales_Volume                               1.000000
Year_2022                                  0.012249
Model_X6                                   0.011690
Region_North America                       0.010548
Model_M5                                   0.006527
Region_Europe                              0.005913
Mileage_KM                                 0.005660
Year_2024                                  0.005385
Color_Silver                               0.005095
Color_White                                0.004660
Model_7 Series                             0.004476
Fuel_Type_Electric                         0.002763
Model_X1                                   0.002731
Model_X5                                   0.002685
Transmission_Manual                        0.001824
Fuel_Type_Hybrid                           0.001349
Model_M3                                   0.001226
Price_USD                                  0.000952
Model_X3    

  df["Sales_Volume"] = pd.to_numeric(df["Sales_Volume"], errors="ignore").fillna(0)


In [3]:
# Assume summary is your JSON/dict summary of sales
llm_agent = LLMReportAgent()

# # Call analyze with summary and figures directory
# report = llm_agent.analyze(summary, "../reports")

# # Print the analysis report text (with possible plot paths)
# print(report)

In [4]:
sales_report_md = llm_agent.analyze_sales_trend(sales_summary, "../reports")
model_report_md = llm_agent.analyze_model_trend(model_summary, "../reports")
drivers_report_md = llm_agent.analyze_correlation_matrix(sales_drivers, "../reports")

combined_report_path = build_markdown_report(
    [sales_report_md, model_report_md, drivers_report_md],
    out_dir="../reports",
    report_title="BMW Sales & Model Performance Analysis",
    intro_text="This report combines overall sales trends and model performance highlights."
)

print(f"Combined report saved to: {combined_report_path}")


Combined report saved to: ../reports/report.md
