<a href="https://colab.research.google.com/github/BinduGGowda/Cost-comparsion/blob/main/Cost-Comparsion.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [44]:
# Install required packages
!pip install pandas openpyxl
import pandas as pd
from google.colab import files
from openpyxl import Workbook
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.chart import BarChart, Reference
from openpyxl.styles import PatternFill
from openpyxl.utils import get_column_letter



In [55]:
# Function to generate dynamic cost comparison report
def generate_cost_comparison_report(files, sheet_name="Data", output_file="Cost_Comparison_Report.xlsx"):
    if len(files) < 2:
        raise ValueError("At least two files are required for comparison.")

    # Step 1: Read and aggregate all files
    agg_list = []
    file_names = []
    for file in files:
        df = pd.read_excel(file, sheet_name=sheet_name, engine="openpyxl")
        agg = df.groupby(["ResourceType", "Squad"])["CostUSD"].sum().reset_index()
        agg_list.append(agg)
        file_names.append(file.split('.')[0])

    # Step 2: Merge all aggregated data dynamically
    comparison = agg_list[0].rename(columns={"CostUSD": f"Cost in {file_names[0]}"})
    for i in range(1, len(agg_list)):
        comparison = pd.merge(comparison, agg_list[i], on=["ResourceType", "Squad"], how="outer").fillna(0)
        comparison.rename(columns={"CostUSD": f"Cost in {file_names[i]}"}, inplace=True)

    # Step 3: Logic for Monthly vs Quarterly
    if len(files) == 2:  # Monthly comparison
        for i in range(1, len(file_names)):
            diff_col = f"Difference ({file_names[i]} vs {file_names[0]})"
            pct_col = f"Difference Percentage ({file_names[i]} vs {file_names[0]})"
            comparison[diff_col] = comparison[f"Cost in {file_names[i]}"] - comparison[f"Cost in {file_names[0]}"]
            # Show absolute percentage (remove negative sign)
            comparison[pct_col] = comparison.apply(
                lambda row: 0 if row[f"Cost in {file_names[0]}"] == 0 else abs(round(((row[f"Cost in {file_names[i]}"] - row[f"Cost in {file_names[0]}"]) / row[f"Cost in {file_names[0]}"] * 100), 2)),
                axis=1
            )
    elif len(files) == 3:  # Quarterly comparison
        cost_cols = [f"Cost in {name}" for name in file_names]
        comparison["Total Cost"] = comparison[cost_cols].sum(axis=1)

    # Sort by Squad and ResourceType
    comparison.sort_values(by=["Squad", "ResourceType"], inplace=True)
    cols = ["Squad"] + [col for col in comparison.columns if col != "Squad"]
    comparison = comparison[cols]

    # Summary by file
    total_costs = []
    for i, file in enumerate(files):
        df = pd.read_excel(file, sheet_name=sheet_name, engine="openpyxl")
        total_costs.append(df["CostUSD"].sum())
    summary = pd.DataFrame({"File": file_names, "TotalCostUSD": total_costs})

    # Squad-level summary
    squad_summary = None
    for i, file in enumerate(files):
        df = pd.read_excel(file, sheet_name=sheet_name, engine="openpyxl")
        squad_cost = df.groupby("Squad")["CostUSD"].sum().reset_index().rename(columns={"CostUSD": f"Cost in {file_names[i]}"})
        squad_summary = squad_cost if squad_summary is None else pd.merge(squad_summary, squad_cost, on="Squad", how="outer").fillna(0)

    # Create Excel workbook
    wb = Workbook()

    # Detailed Comparison sheet
    ws1 = wb.active
    ws1.title = "Detailed Comparison"
    for r in dataframe_to_rows(comparison, index=False, header=True):
        ws1.append(r)

    # Conditional formatting for Diff columns (only for monthly)
    if len(files) == 2:
        diff_cols = [col for col in comparison.columns if col.startswith("Difference (")]
        for col_name in diff_cols:
            col_index = comparison.columns.get_loc(col_name) + 1
            for row in range(2, ws1.max_row + 1):
                cell = ws1.cell(row=row, column=col_index)
                if cell.value > 0:
                    cell.fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")  # Red for positive diff
                elif cell.value < 0:
                    cell.fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")  # Green for negative diff

    # Auto-adjust column widths
    for col in ws1.columns:
        max_length = max(len(str(cell.value)) for cell in col)
        ws1.column_dimensions[get_column_letter(col[0].column)].width = max_length + 2

    # Summary sheet
    ws2 = wb.create_sheet(title="Summary")
    for r in dataframe_to_rows(summary, index=False, header=True):
        ws2.append(r)

    # Chart 1: Total Cost by File
    chart1 = BarChart()
    chart1.title = "Total Cost by File"
    chart1.x_axis.title = "File"
    chart1.y_axis.title = "Total Cost (USD)"
    data1 = Reference(ws2, min_col=2, min_row=1, max_row=ws2.max_row)
    cats1 = Reference(ws2, min_col=1, min_row=2, max_row=ws2.max_row)
    chart1.add_data(data1, titles_from_data=True)
    chart1.set_categories(cats1)
    chart1.type = "col"
    chart1.style = 10
    ws2.add_chart(chart1, "E2")

    # Add Squad Summary table
    ws2.append([])
    ws2.append(["Squad Summary"])
    for r in dataframe_to_rows(squad_summary, index=False, header=True):
        ws2.append(r)

    # Chart 2: Squad Cost Comparison
    start_row = ws2.max_row - len(squad_summary) + 1
    cats2 = Reference(ws2, min_col=1, min_row=start_row + 1, max_row=ws2.max_row)
    chart2 = BarChart()
    chart2.title = "Squad Cost Comparison"
    chart2.x_axis.title = "Squad"
    chart2.y_axis.title = "Cost (USD)"
    chart2.type = "col"
    chart2.style = 10
    chart2.grouping = "clustered"

    for i in range(2, 2 + len(file_names)):
        series = Reference(ws2, min_col=i, min_row=start_row, max_row=ws2.max_row)
        chart2.add_data(series, titles_from_data=True)

    chart2.set_categories(cats2)
    ws2.add_chart(chart2, "E20")

    # Add Top Differences sheet ONLY for 2 files
    if len(files) == 2:
        top10 = comparison.sort_values(by=f"Difference ({file_names[1]} vs {file_names[0]})", ascending=False).head(10)
        ws3 = wb.create_sheet(title="Top Differences")
        for r in dataframe_to_rows(top10, index=False, header=True):
            ws3.append(r)

    # Save workbook
    wb.save(output_file)
    print(f"✅ Report generated successfully: {output_file}")




In [56]:
# Upload files dynamically (one file per input)

print("Start uploading files (minimum 2, maximum 3).")
file_list = []

while len(file_list) < 3:
    print(f"Upload File {len(file_list)+1}:")
    uploaded = files.upload()  # Accept one file at a time

    # Validate single file upload
    if len(uploaded) > 1:
        print("⚠️ Please upload only ONE file at a time. Taking the first file.")
    file_name = list(uploaded.keys())[0]
    file_list.append(file_name)

    if len(file_list) >= 2 and len(file_list) < 3:
        more = input("Do you want to upload another file? (yes/no): ").strip().lower()
        if more == 'no':
            break

# Validation
if len(file_list) < 2:
    raise ValueError("You must upload at least 2 files.")

# Generate report
generate_cost_comparison_report(files=file_list, sheet_name="Data", output_file="Cost_Comparison_Report.xlsx")

# Download the generated report
files.download("Cost_Comparison_Report.xlsx")


Start uploading files (minimum 2, maximum 3).
Upload File 1:


Saving LSEGSaaSPRD_CostAnalysis_Sep2025 1.xlsx to LSEGSaaSPRD_CostAnalysis_Sep2025 1 (30).xlsx
Upload File 2:


Saving LSEGSaaSPPR_CostAnalysis_Sep2025.xlsx to LSEGSaaSPPR_CostAnalysis_Sep2025 (38).xlsx
Do you want to upload another file? (yes/no): no
✅ Report generated successfully: Cost_Comparison_Report.xlsx


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>