In [196]:
import pandas as pd

from datetime import datetime
import os

from openpyxl.styles import PatternFill



In [274]:

def mastersheet_data_load():

       # read the excel file and see the sheet names
       xl_files = pd.ExcelFile(r'..\landing_zone\2026\Hyundia\2025-11-1 HACC MAF - 20251113.xlsx')

       target_sheets = "Master"
       
       if target_sheets in xl_files.sheet_names:
              hyundai_master_sheet_df = pd.read_excel(r"..\landing_zone\2026\Hyundia\2025-11-1 HACC MAF - 20251113.xlsx", sheet_name=target_sheets, skiprows=1)

       # Keeping only the columns we need

       key_columns_to_keep = ['Model \nYear \nFrom', 'Model', 'Trim 1', 'Trim 2',
              'Trim 3', 'Trim 4', 'Trim 5', 'Trim 6', 'Trim 7', 'Trim 8',
              'Accessory Description (EN)', 'Accessory Description (FR)',
              'Part\nNumber', 'DNET', 'MSRP', 'Labour Rate', 'Suggested Labour Hours',
              'Suggested Base Retail Price w/labour', 'Comments (EN)', 'Comments (FR)']

       hyundai_master_sheet_df = hyundai_master_sheet_df[key_columns_to_keep]


       hyundai_master_sheet_df = hyundai_master_sheet_df.dropna(axis=1, how='all')


       # changed column names for master_file

       column_names_to_change = {
       'Model \nYear \nFrom': "Year", 
       'Part\nNumber':'PartNumber', 
       'Suggested Labour Hours':'Labour Hours',
       'Suggested Base Retail Price w/labour':'MSPR w/labour'}

       hyundai_master_sheet_df.rename(columns=column_names_to_change, inplace=True)


       return hyundai_master_sheet_df 

hyundai_master_sheet_df = mastersheet_data_load()



In [None]:
# set standard column names for master file
standard_column_names = ['Year', 'Model', 'Trim', 'Description', 'PartNumber',
       'DNET', 'MSRP', 'Labour Rate', 'Hours', 'MSPR w/labour',
       'Comments']

In [321]:
def hyundai_model_load():
       # load Hyundia model db

       hyundai_model_db_df = pd.read_csv(r"..\database\dbs\Hyundai_models_db.csv")

       # very special changes 

       mask = (hyundai_model_db_df["Year"] == 2024) & (hyundai_model_db_df["Model"].str.lower() == "elantra") & (hyundai_model_db_df["Trim"].str.contains(r"\bHybrid Luxury\b", case=False))


       # Replace "Hybrid Luxury" with "HEV" in Trim for those records
       hyundai_model_db_df.loc[mask, "Trim"] = hyundai_model_db_df.loc[mask, "Trim"].str.replace(r"\bHybrid Luxury\b", "HEV", case=False, regex=True)

       return hyundai_model_db_df

hyundai_model_db_df = hyundai_model_load()


In [320]:

def transform_vehicle_data(df):
    """
    Transforms vehicle data by applying trim corrections and model/year-specific changes.
    
    Args:
        df (pd.DataFrame): DataFrame with columns ['Year', 'Model', 'Trim'].
    
    Returns:
        pd.DataFrame: Transformed DataFrame.
    """
    
    # General trim corrections
    data_to_change_on_trim = {
        "Caligraphy": "Calligraphy",
        # "Calli ICE": "Calligraphy",
    }
    
    # Model/year-specific changes
    change_for_specific_veh = {
        "Tucson": {
            2026: {"Ultimate AWD": "Utlimate PHEV"}
        },
        "Kona": {
            2026: {"Pref w/Ult Pkg": "Preferred w/Ultimate Pkg"}
        }
    }
    
    # Santa Fe-specific changes
    changes_for_santafe = {
        "Preferred Hybrid": "Preferred HEV",
        "Luxury Hybrid": "Luxury HEV",
        "Calligraphy Hybrid": "Calligraphy HEV"
    }
    
    # 1. Apply general trim corrections
    df["Trim"] = df["Trim"].replace(data_to_change_on_trim)
    
    # 2. Apply model/year-specific changes
    for model, year_dict in change_for_specific_veh.items():
        for year, trim_changes in year_dict.items():
            for old_trim, new_trim in trim_changes.items():
                mask = (df["Model"] == model) & (df["Year"] == year) & (df["Trim"] == old_trim)
                df.loc[mask, "Trim"] = new_trim
    
    # 3. Apply Santa Fe-specific changes
    for old_trim, new_trim in changes_for_santafe.items():
        mask = (df["Model"] == "Santa Fe") & (df["Trim"] == old_trim)
        df.loc[mask, "Trim"] = new_trim
    
    return df


In [291]:
hyundai_model_db_df = transform_vehicle_data(hyundai_model_db_df)

In [316]:
# defining global assets

# tags dictionary for model number mapping

tagy = {
 'Pref': {'tags': ['Preferred']},
 'Urban': {'tags': ['Urban']},
 'Calli': {'tags': ['Calligraphy']},
 'Calli ICE': {'tags': ['Calligraphy',"ICE"]},
 'Trend': {'tags': ['Trend']},
 'N-Line': {'tags': ['N-Line']},
 'Lux': {'tags': ['Luxury']},
 'Lux ICE': {'tags': ['Luxury']},
 'Ult HEV': {'tags': ['Ultimate', 'HEV']},
 'Ult PHEV': {'tags': ['Ultimate', 'PHEV']},
 'Ult': {'tags': ['Ultimate']},
 'Ess': {'tags': ['Essential']},
 'HEV': {'tags': ['HEV']},
 'Luxury HEV': {'tags': ['Luxury','HEV']},
 'Hybrid Luxury': {'tags': ['Luxury','Hybrid']},
 'HEV Luxury': {'tags': ['Luxury','Hybrid']},
 'N': {'tags': ['N']},
 'N-Line Ult': {'tags': ['N-Line', 'Ult']},
 'Advanced': {'tags': ['Advanced']},
 'Performance': {'tags': ['Performance']},
 '2.5T Advanced': {'tags': ['2.5T', 'Advanced']},
 '2.5T Advanced Plus': {'tags': ['2.5T', 'Advanced', 'Plus']},
 '2.5T Prestige': {'tags': ['2.5T', 'Prestige']},
 '3.5T Sport': {'tags': ['3.5T', 'Sport']},
 '3.5T Sport Plus': {'tags': ['3.5T', 'Sport', 'Plus']},
 'Prestige': {'tags': ['Prestige']},
 '3.3T Advanced': {'tags': ['3.3T', 'Advanced']},
 '3.3T Sport': {'tags': ['3.3T', 'Sport']},
 '2.5T Adv': {'tags': ['2.5T', 'Adv']},
 '3.5T e-SC': {'tags': ['3.5T', 'e-SC']},
 '3.5T Advanced': {'tags': ['3.5T', 'Advanced']},
 '3.5T Prestige': {'tags': ['3.5T', 'Prestige']},
 '3.5T Prestige 7P': {'tags': ['3.5T', 'Prestige', '7P']},
 'Trend AWD': {'tags': ['Trend', 'AWD']},
 'Pref HEV': {'tags': ['Preferred', 'HEV']},
 'Pref HEV w/ Trend': {'tags': ['Preferred', 'HEV','Trend']},
 'XRT': {'tags': ['XRT']},
 'Ult Calli': {'tags': ['Ultimate', 'Calli']},
 'HEV NHL Ed': {'tags': ['HEV', 'NHL', 'Ed']},
 'Sport': {'tags': ['Sport']},
 'Pref LR': {'tags': ['Preferred', 'Long Range']},
 '3.5T Sport Plus with eLSD': {'tags': ['3.5T',
   'Sport',
   'Plus',
   'with',
   'eLSD']},
 '2.5T Advanced 5P': {'tags': ['2.5T', 'Advanced', '5P']},
 '2.5T Advanced Tech Pkg 5P': {'tags': ['2.5T',
   'Advanced',
   'Tech',
   'Pkg',
   '5P']},
 '3.5T Advanced Tech Pkg 7P': {'tags': ['3.5T',
   'Advanced',
   'Tech',
   'Pkg',
   '7P']},
 '3.5T Coupe': {'tags': ['3.5T', 'Coupe']},
 'Pref Trend': {'tags': ['Preferred', 'Trend']},
#  'Calli ICE': {'tags': ['Calli', 'ICE']},
 'Lux HEV': {'tags': ['Luxury', 'HEV']},
 'Calli HEV': {'tags': ['Calli', 'HEV']},
 '2.5T Advanced Tech': {'tags': ['2.5T', 'Advanced', 'Tech']},
 '3.5T Prestige Black': {'tags': ['3.5T', 'Prestige', 'Black']},
 '3.5T e-SC Prestige': {'tags': ['3.5T', 'e-SC', 'Prestige']},
 '3.5T e-SC Prestige Black': {'tags': ['3.5T', 'e-SC', 'Prestige', 'Black']},
 '3.5T Advanced Tech': {'tags': ['3.5T', 'Advanced', 'Tech']}
 }


missing_model_num_ = pd.DataFrame(columns=["Year", "Model", "Trim"])
missing_keyword = {}

In [205]:
def find_trim_columns_in_df(df):
    trim_columns = [col for col in df.columns if "trim" in col.lower()]
    return trim_columns


# helper function to show unique values in the master file

def show_unique_values(df, columns):
    """
    Display unique values for the specified columns in a DataFrame.
    
    Parameters:
    df (pd.DataFrame): The DataFrame to analyze.
    columns (list): List of column names to check.
    
    Returns:
    dict: A dictionary with column names as keys and unique values as lists.
    """
    unique_dict = {}
    for col in columns:
        if col in df.columns:
            unique_dict[col] = df[col].unique().tolist()
            # print(f"{col}: {df[col].unique().tolist()}")
        else:
            unique_dict[col] = "Column not found"
    return unique_dict



def filter_tags(tag_dict, to_remove):
    """
    Removes unwanted values from the 'tags' lists in a dictionary.

    Parameters:
        tag_dict (dict): Dictionary where each key has a 'tags' list.
        to_remove (set): Set of values to remove from the tags.

    Returns:
        dict: A new dictionary with filtered tags.
    """
    filtered_dict = {}

    for key, value in tag_dict.items():
        if "tags" in value and isinstance(value["tags"], list):
            # Keep only tags not in to_remove
            new_tags = [tag for tag in value["tags"] if tag not in to_remove]
            filtered_dict[key] = {"tags": new_tags}

    return filtered_dict



def create_keyword_dictionary(input_set):
    """
    Takes a set of strings, splits each string by spaces,
    and returns a set of unique keywords.
    
    Parameters:
        input_set (set): A set containing strings.
    
    Returns:
        set: A set of unique keywords.
    """
    keyword_dictionary_set = set()
    
    for value in input_set:
        if isinstance(value, str):  # Ensure it's a string
            words = value.split()   # Split by spaces
            keyword_dictionary_set.update(words)  # Add words to the set
    
    return keyword_dictionary_set



def find_uniq_trimNames_in_df(df):
    uniq_trim_list = set()
    trim_columns_list = find_trim_columns_in_df(df)
    for item in trim_columns_list:
        uniq_trim_list.update(df[item].dropna().unique())
    return uniq_trim_list

def is_tag_available(search_kwd):
    if search_kwd not in tagy.keys():
        return "Unavailable"
    
    return tagy[search_kwd]["tags"]

    



In [206]:
unique_trim_list = find_uniq_trimNames_in_df(hyundai_master_sheet_df)

In [208]:


def get_model_number_by_year_model_with_trim_key_word(year, model, kwd):
    
    pattern = ''.join(f'(?=.*{kw})' for kw in kwd)  # constructing search keywords pattern 
    # print(pattern)


    res_df = hyundai_model_db_df[
        (1==1)
        &(hyundai_model_db_df["Year"] == year) 

        & (hyundai_model_db_df["Model"].str.lower() == model.lower()) 
        & (hyundai_model_db_df["Trim"].str.contains(pattern, case=False, na=False))
    ]

    
    if len(res_df) <= 0:
        return {"status": 400,
                "value":"",
                    "msg": "No Model number record found. get_model_num_by_YM_wt..()"}
    
    response = {"status": 200, 
                "value": res_df,
                "msg": "Found model number. get_model_num_by_YM_wt..()"}
    return response 


def get_tags_by_model_and_trim(trim):
    # return tag_keywords[trim]
    tag_search_results = tagy[trim]["tags"]

    if len(tag_search_results) <=0:
        missing_keyword[trim] = 'Missing kwd'
        return tag_search_results
    
    
    return {"status": 200,
                    "value": tag_search_results,
                     "msg": "Found tag to use for modelNumber search. get_tag..()"}

def search_hyundai_db_by_model_year_trim(year, model, trim):

    search_response = get_tags_by_model_and_trim(trim)
    
    if search_response["status"] == 400:
        return search_response
    
    tags = search_response["value"]
    get_model_nums_search_response = get_model_number_by_year_model_with_trim_key_word(year, model, tags)
        
    if (get_model_nums_search_response["status"]==400):
        missing_model_num_.loc[len(missing_model_num_)] = [year, model, trim]

        return get_model_nums_search_response
    model_nums = get_model_nums_search_response["value"]

    return {
            "status": 200,
            "value": model_nums["ModelNumber"].tolist(),
            "msg": "Found modelNumbers. search_hyundai_db_by_model_year_trim()"}


In [209]:
def data_extract_all(missing_model_num_):
    dataset_summary = pd.DataFrame(columns=['Year', 'Model', 'Trim', 'Record_Count'])
    year_model_trim_dict = {}
    missing_model_num_ = missing_model_num_.loc[0:0]
    missing_keyword = {}

    # Looping through the master file by year/model and trim

    for year in hyundai_master_sheet_df['Year'].unique():
        for model in hyundai_master_sheet_df['Model'].unique():

    # filtering to the current iteration for year and Model        
            model_df = hyundai_master_sheet_df[
                (hyundai_master_sheet_df['Year'] == year) &
                (hyundai_master_sheet_df['Model'] == model)
            ]
            # source_file = hyundai_master_sheet_df[]
    # Getting only the name of Trim columns in the vehicle DF
            filtered_columns = [col for col in model_df.columns if "trim" in col.lower()]

    # Looping through unique model df, capturing          
            for col in filtered_columns:
                trims = model_df[col].dropna().unique().tolist()
                # skip empty trim columns
                if len(trims) == 0:
                    continue            
                
                trim_name = trims[0]  # grab the trim name in that column. Colum has only one trim name at this point. 
                
                # We are going to create a file for each trim name 
                # Filter the list searching by trimName and save the list 
                if trim_name:
                    
                    # filter by trim name and save a new list
                    subset_df = model_df[model_df[col] == trim_name]
                    
                    # grab all the colms in the subset with the keyword trim
                    copy_filtered_columns = [c for c in subset_df.columns if "trim" in c.lower()]  # grab all trim columns in the subset
                    
                    copy_filtered_columns.remove(col)  # drop the current trim column that we want to keep

                    subset_df = subset_df.drop(columns=copy_filtered_columns)  #  drop all trim columns except the one we want to keep
                    subset_df.rename(columns={col: 'Trim'}, inplace=True)       # rename the current trim column to 'Trim' for clarity
                    
                    # create copy of the subset dataframe and store both in the dist as english and french versions
                    # Identify columns containing "(EN)"
                    English_columns = [col for col in hyundai_master_sheet_df.columns if "(EN)" in col]
                    French_columns = [col for col in hyundai_master_sheet_df.columns if "(FR)" in col]

                    # Initialize nested dict if not exists
                    key = (year, model, trim_name)
                    year_model_trim_dict.setdefault(key, {"FR":pd.DataFrame(), "EN":pd.DataFrame()})

                    # Remove English columns for French version and vice versa
                    # first check if there are any columns to drop

                    # get model numbers by year/model/trim
                    # they could be one or many, just because
                    # some trims come w/t different options
                    # and some have different version that came at different times of the year same trims
                    
                    # Created a list that will get all the model_numbers, they each will have a copy of the data for the particular trim, since they are all handled as unique vehicles.
                    modelNum_search_results = search_hyundai_db_by_model_year_trim(year = year, model=model, trim = trim_name)


                    if modelNum_search_results["status"] == 400:
                        continue
                    model_nums = modelNum_search_results["value"]

                    # loop through all model numbers and create a copy list for each model number
                    for modelnum in model_nums:
                        
                        if len(English_columns) > 0:
                            
                            uniq_model_accy_list = subset_df.drop(columns=English_columns)

                            # standardize column names for French version
                            uniq_model_accy_list.columns = standard_column_names
                            
                            # get the model_number of the vehicle and add it to the dataframe
                            
                            uniq_model_accy_list["ModelNumber"] = modelnum
                            # year_model_trim_dict[key]["FR"].columns = 
                            year_model_trim_dict[key]["FR"] = pd.concat([year_model_trim_dict[key]["FR"], uniq_model_accy_list], ignore_index=True)
                            
                            # then drop some columns to match rate importer format
                            # then group all the df for each model and year and finally save them to an excel files
            
                        if len(French_columns) > 0:
                            uniq_model_accy_list = subset_df.drop(columns=French_columns)

                            # standardize column names for French version
                            uniq_model_accy_list.columns = standard_column_names
                            
                            # get the model_number of the vehicle and add it to the dataframe
                            
                            uniq_model_accy_list["ModelNumber"] = modelnum

                            year_model_trim_dict[key]["EN"] = pd.concat([year_model_trim_dict[key]["EN"], uniq_model_accy_list], ignore_index=True)
                        
                            

                        # dataset_summary[(year, model, trim_name)] = len(subset_df)
                        dataset_summary.loc[len(dataset_summary)] = [year, model, trim_name, len(subset_df)]

                        # print(f"Year: {year}, Model: {model}, Trim: {trim_name}, Records: {len(subset_df)}")
    return year_model_trim_dict, dataset_summary

In [210]:
def merge_lists_by_model_number(master_dict):
    merged_dict = {}

    # Merge all DataFrames by model and language
    for (year, model, trim_name), value in master_dict.items():
        if model not in merged_dict:
            merged_dict[model] = {}

        for lang, accy_list in value.items():
            if lang not in merged_dict[model]:
                merged_dict[model][lang] = pd.DataFrame()

            merged_dict[model][lang] = pd.concat([merged_dict[model][lang], accy_list], ignore_index=True)

    # Filter and rename columns after merging
    required_columns = ["Year", "ModelNumber", "Description", "PartNumber", "DNET", "MSRP", "Hours", "Comments"]
    rate_importer_name_column_names = ["Year", "Model", "Description", "Part", "Cost", "Price", "Hours", "Comments"]

    for model, lang_dict in merged_dict.items():
        for lang, df in lang_dict.items():
            if not df.empty:
                existing_cols = [col for col in required_columns if col in df.columns]
                filtered_copy = df[existing_cols].copy()
                rename_map = {old: new for old, new in zip(required_columns, rate_importer_name_column_names) if old in existing_cols}
                filtered_copy.rename(columns=rename_map, inplace=True)
                merged_dict[model][lang] = filtered_copy

    return merged_dict
    




In [211]:
def get_upload_report(merged_files_dict):
    for keys, values in merged_files_dict.items():
        en_cols = len(values["EN"].columns) if "EN" in values and isinstance(values["EN"], pd.DataFrame) else "NA"
        fr_cols = len(values["FR"].columns) if "FR" in values and isinstance(values["FR"], pd.DataFrame) else "NA"
        
        print(f"{keys} : EN: {en_cols}, FR: {fr_cols}")

        # create a report df


    


In [212]:

def save_merged_dict_to_excel(merged_dict):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"Hyundai_Master_ACCY_Ready_To_Load_{timestamp}.xlsx"
    directory = r"..\ready_to_upload\master_files"
    file_path = os.path.join(directory, filename)

    os.makedirs(directory, exist_ok=True)

    with pd.ExcelWriter(file_path, engine='openpyxl') as writer:
        for model, lang_dict in merged_dict.items():
            for lang, df in lang_dict.items():
                sheet_name = f"{model}_{lang}"[:31]

                if isinstance(df, pd.DataFrame):
                    if not df.empty:
                        # Write normal sheet
                        df.to_excel(writer, sheet_name=sheet_name, index=False)
                    else:
                        # Write empty sheet
                        pd.DataFrame({"Message": ["No data available"]}).to_excel(writer, sheet_name=sheet_name, index=False)

                        # Apply orange tab color
                        ws = writer.book[sheet_name]
                        ws.sheet_properties.tabColor = "FFA500"  # Orange hex code

        # If no sheets were written at all, create a placeholder
        if not writer.sheets:
            pd.DataFrame({"Message": ["No data available"]}).to_excel(writer, sheet_name="Empty", index=False)
            ws = writer.book["Empty"]
            ws.sheet_properties.tabColor = "FFA500"  # Orange tab color

    return file_path

In [213]:

# check if each 
# Get a list of unique model numbers from both Hyundia_db and uniq accy_list dict 
# store them in vars named db_mdNumCount and file_mdNumCount
# Create column is_identical to check if they are identical  
# Create column is_equal_count to check if they are equal in length
# create column mdNUms_db
# create column mdNums_file

# Year/Model.countUniqModelNUmber() 
def data_check(year_model_trim_dict, hyundai_model_db_df): 
    data_check_report_df = pd.DataFrame(columns=["Year", "Model", "Trim", "Lang", "is_list_identical?", "db_mdNumCount", "file_mdNumCount", "mdNUms_db", "mdNums_file"])
    for (year, model, trim_name),value in year_model_trim_dict.items():
    
        # # get unique model numbers - files
        # FR_uniq_model_nums = value["FR"]["ModelNumber"].unique().tolist()
        # EN_uniq_model_nums = value["EN"]["ModelNumber"].unique().tolist()
        
        # get unique models numbers from 
        search_resp = search_hyundai_db_by_model_year_trim(year, model, trim_name)

        if search_resp["status"] == 400:
            db_mdNumCount = "Veh Not found in db"
            mdNums_from_db = []
        else:
            mdNums_from_db = search_resp["value"]
            db_mdNumCount = len(mdNums_from_db) 
        
        for lang, accy_list in value.items():
            
            if "ModelNumber" not in accy_list.columns:
                uniq_model_nums_list = "Error: File has no model number column"
                is_indentical_list = False
                count_mdNUms_from_accy_list = 0
                continue 
            else:
                uniq_model_nums_list = accy_list["ModelNumber"].unique().tolist()
                is_indentical_list = (mdNums_from_db == uniq_model_nums_list)
                count_mdNUms_from_accy_list = len(uniq_model_nums_list)

            new_record_df =  pd.DataFrame(
                [[year, 
                model, 
                trim_name, 
                lang,
                is_indentical_list, 
                db_mdNumCount, 
                count_mdNUms_from_accy_list, 
                mdNums_from_db, 
                uniq_model_nums_list]]
                , columns=["Year", "Model", "Trim", "Lang", "is_list_identical?", "db_mdNumCount", "file_mdNumCount", "mdNUms_db", "mdNums_file"])

            data_check_report_df = pd.concat([data_check_report_df, new_record_df], ignore_index=True)
            
    return data_check_report_df





In [214]:
def check_missing_model_numbers_report_model_db(missing_model_num_, hyundai_model_db_df):
    # missing model numbers report 
    missing_data_report_df = pd.DataFrame(columns=["Year", "Model", "Trim", "missingMd#?", "Comments"])

    for index, row in missing_model_num_.iterrows():
        
        # search the veh model in the model number db by year/model/ and trim
        
        search_result = hyundai_model_db_df[
            (1==1)
            & (hyundai_model_db_df["Year"] == row["Year"])
            & (hyundai_model_db_df["Model"] == row["Model"])
            & (hyundai_model_db_df["Trim"] == row["Trim"])
            ]

        if search_result.empty:
            mdNUmStatus = True
        else:
            mdNUmStatus = False

        new_row = pd.DataFrame(
                                [[row["Year"], row["Model"],row["Trim"], mdNUmStatus, "Missing model Number."]], 
                                columns=["Year", "Model", "Trim", "missingMd#?", "Comments"]
                            )  
        # concat dfs
        missing_data_report_df = pd.concat([missing_data_report_df, new_row], ignore_index=True)
    
    return missing_data_report_df



In [215]:
# # Prepare the output filename with timestamp
# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# filename = "Hyundai_ACCY_load_report_{timestamp}.csv"
# directory = r"..\reports_log\Hyundai\data_load"
# output_path = os.path.join(directory, filename)

# data_load_report_check_df.to_csv(output_path, index=False)


In [216]:
def create_tags_for_veh_trims(df):
   # Decomposing trim to enable tagging
   all_trim_names= df["Trim"].unique().tolist()
   extracted_dict = {}
   kwds_to_drop = ["w/t", "with"]
   for item in all_trim_names:
      if " " not in item:
         parts = [item]

      else:
         parts = item.split()
      
      extracted_dict[item]= {"tags" :parts} 

   return extracted_dict
# create_tags_for_veh_trims(dataset_summary)

In [217]:
# cleaned_models.to_csv(r'..\database\dbs\Hyundai_models_db.csv', index=False)

In [218]:
# filtered_tags = filter_tags(tagy, (model_num_db_kwrd - master_df_kwdrs))

In [219]:
def find_missing_modelNums(df):
    # dataset_summary[dataset_summary["Trim"].str.contains("w/")]
    # counter = 0
    missing_model_numbers = pd.DataFrame(columns=["Year", "Model", "Trim"])

    for index, row in df.iterrows():
        Model_numbers_output = search_hyundai_db_by_model_year_trim(row.Year, row.Model, row.Trim)
        
        if 0 >= len(Model_numbers_output):
            # print(f"Year : {row.Year}, Model : {row.Model}, Trim : {row.Trim} -> No Model_numbers")
            missing_model_numbers.loc[len(missing_model_numbers)] = [row.Year, row.Model, row.Trim]
    return missing_model_numbers



In [307]:

year_model_trim_dict, dataset_summary = data_extract_all(missing_model_num_)
merged_files_dict = merge_lists_by_model_number(year_model_trim_dict)
# save_merged_dict_to_excel(merged_dict= merged_files_dict)
# find_missing_modelNums(dataset_summary)
 
data_load_report_check_df = data_check(year_model_trim_dict, hyundai_model_db_df)
missing_data_report = check_missing_model_numbers_report_model_db(missing_model_num_, hyundai_model_db_df)

In [308]:
get_upload_report(merged_files_dict)

Palisade : EN: 8, FR: 8
Tucson : EN: 8, FR: 8
Santa Cruz : EN: 8, FR: 8
Elantra : EN: 8, FR: 8
Ioniq 6 : EN: 8, FR: 8
Kona : EN: 8, FR: 8
Kona EV : EN: 8, FR: 8
Venue : EN: 8, FR: 8
Ioniq 5 : EN: 8, FR: 8
GV60 : EN: 0, FR: 0
GV70 : EN: 0, FR: 0
GV70 EV : EN: 0, FR: 0
G70 : EN: 0, FR: 0
G80 EV : EN: 0, FR: 0
G80 : EN: 0, FR: 0
G90 : EN: 0, FR: 0
GV80 : EN: 0, FR: 0
Sonata : EN: 8, FR: 8
Santa Fe : EN: 8, FR: 8
GV80 Coupe : EN: 0, FR: 0
Ioniq 9 : EN: 8, FR: 8


In [309]:
missing_data_report["Model"].unique()

array(['Palisade', 'GV60', 'GV70', 'GV70 EV', 'G70', 'G80 EV', 'G80',
       'G90', 'GV80', 'Santa Fe', 'GV80 Coupe', 'Tucson', 'Santa Cruz',
       'Kona EV', 'Sonata'], dtype=object)

In [310]:
missing_data_report[missing_data_report["Model"]=="Palisade"]

Unnamed: 0,Year,Model,Trim,missingMd#?,Comments
0,2024,Palisade,Calli,True,Missing model Number.
46,2026,Palisade,Calli ICE,True,Missing model Number.
84,2024,Palisade,Calli,True,Missing model Number.
130,2026,Palisade,Calli ICE,True,Missing model Number.
168,2024,Palisade,Calli,True,Missing model Number.
214,2026,Palisade,Calli ICE,True,Missing model Number.
252,2024,Palisade,Calli,True,Missing model Number.
298,2026,Palisade,Calli ICE,True,Missing model Number.


In [323]:
is_tag_available("Calli")

['Calligraphy']

In [324]:
search_hyundai_db_by_model_year_trim(2026, "Palisade", "Calli ICE") 

{'status': 400,
 'value': '',
 'msg': 'No Model number record found. get_model_num_by_YM_wt..()'}

In [None]:

hyundai_model_db_df[
                    (hyundai_model_db_df["Year"]==2026) 
                    & 
                    (hyundai_model_db_df["Model"]=="Palisade")
                    &(hyundai_model_db_df["Trim"].str.contains("Calligraphy", case=False))
                    # &(hyundai_model_db_df["Trim"].str.contains("Ultimate AWD", case=False))
                    ]
 

Unnamed: 0,Year,ModelNumber,Model,Trim,Source_sheets
48,2026,PACW7K3FULCA,Palisade,3.5L Ultimate Calligraphy 7-Pass,2026
51,2026,PAHW7G2DULCH,Palisade,2.5T Ultimate Calligraphy HEV 7-Pass,2026
