In [1]:
import pandas as pd

In [12]:
hyundai_file = r"..\Assets\vehicle_models_db\HYUNDAI SEP25.xlsx"

db_directory = r"..\Assets\vehicle_models_db"

In [None]:
import pandas as pd
import os
from openpyxl import load_workbook

def update_vehicle_database_from_excel(excel_file, directory):
    """
    Reads an Excel file with multiple worksheets and updates a vehicle model database CSV file.
    Only adds new records that do not already exist based on Model, Description, Trim, and Year.

    Parameters:
    - excel_file: Path to the Excel file containing vehicle data.
    - directory: Directory where the vehicle_model_db.csv file is stored or should be created.
    """
    # Ensure the directory exists
    os.makedirs(directory, exist_ok=True)

    # Define the path to the database file
    db_file = os.path.join(directory, "vehicle_model_db.csv")

    # Load existing database or create an empty DataFrame
    if os.path.exists(db_file):
        df_existing = pd.read_csv(db_file)
    else:
        df_existing = pd.DataFrame(columns=["Model", "Trim", "Year", "Description", "Active"])

    # Load the Excel workbook
    wb = load_workbook(excel_file, data_only=True)

    # List to collect new records
    new_records = []

    # Process each worksheet
    for sheet_name in wb.sheetnames:
        ws = wb[sheet_name]
        year = ''.join(filter(str.isdigit, sheet_name))  # Extract numeric year from sheet name

        # Extract headers
        headers = [cell.value for cell in ws[1]]
        try:
            model_idx = headers.index("Model")
            desc_idx = headers.index("Description")
            trim_idx = headers.index("Description 2")
        except ValueError:
            continue  # Skip sheet if required columns are missing

        # Extract data rows
        for row in ws.iter_rows(min_row=2, values_only=True):
            model = row[model_idx]
            desc = row[desc_idx]
            trim = row[trim_idx]
            if model and desc and trim:
                record = {
                    "Model": model,
                    "Trim": trim,
                    "Year": year,
                    "Description": desc,
                    "Active": True,
                }
                new_records.append(record)

    # Convert new records to DataFrame
    df_new = pd.DataFrame(new_records)

    # Combine and drop duplicates based on Model, Description, Trim, Year
    df_combined = pd.concat([df_existing, df_new], ignore_index=True)
    df_combined.drop_duplicates(subset=["Model", "Description", "Trim", "Year"], keep='first', inplace=True)

    # Save updated database
    df_combined.to_csv(db_file, index=False)
    print(f"Database updated at {db_file}. Total records: {len(df_combined)}")

    

In [28]:
update_vehicle_database_from_excel(hyundai_file, db_directory)

Database updated at ..\Assets\vehicle_models_db\vehicle_model_db.csv. Total records: 91
