# ML Project Jupyter Notebook 3 (Open-Ended Section - Interface for Predictions)

### Group 51

André Lourenço – 20240743 <br>
Carolina Pinto – 20240494 <br>
Daniel Caridade – 20211588 <br>
Fábio dos Santos - 20240678 <br>
Gustavo Gomes – 20240657 <br>

__`Step 1`__ Import needed libraries.

In [73]:
pip install tkcalendar




[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: c:\Users\gus_g\.pyenv\pyenv-win\versions\3.12.5\python.exe -m pip install --upgrade pip





In [74]:
import pandas as pd
import numpy as np
import warnings

from tkinter import ttk
import joblib
from datetime import datetime
import tkinter as tk
from tkcalendar import DateEntry

__`Step 2`__ Load imputation dictionaries, cout enconder, label encoder and the model.

In [75]:
all_imputation_dicts = joblib.load('dumps/imputation_dict.joblib')
count_encoder = joblib.load('dumps/count_encoder.pkl')
label_encoder = joblib.load('dumps/label_encoder.pkl')
model = joblib.load('dumps/model.pkl')

__`Step 3`__ Define functions used during preprocessing.

__`Step 3.1`__ Create functions to check if a value can or cannot be converted into an integer.

In [76]:
def cannot_be_int(val):
    try:
        int(float(val))
        return False
    except ValueError:
        return True

In [77]:
def can_be_int(val):
    try:
        int(float(val))
        return True     
    except ValueError:
        return False

__`Step 3.2`__ Create a function to treat incoherences.

In [78]:
def treating_incoherences(data):
    # Replace "Age at Injury" < 16 with NaN
    data.loc[data['Age at Injury'] < 16, 'Age at Injury'] = np.nan

    # Replace 0 with NaN in "Birth Year"
    data["Birth Year"] = data["Birth Year"].replace(0, np.nan)

    # Create "Average Weekly Wage ZERO" flag and replace 0 with NaN in "Average Weekly Wage"
    data['Average Weekly Wage ZERO'] = data['Average Weekly Wage'].apply(lambda x: 0 if x == 0 else 1)
    data["Average Weekly Wage"] = data["Average Weekly Wage"].replace(0, np.nan)

    # Update 'COVID-19 Indicator' where WCIO Nature of Injury Description indicates COVID-19
    data.loc[
        (data['COVID-19 Indicator'] != 'Y') & (data['WCIO Nature of Injury Description'] == 'COVID-19'),
        'COVID-19 Indicator'
    ] = 'Y'
    # Process invalid zip codes
    invalid_zip_mask = data['Zip Code'].notna() & data['Zip Code'].apply(cannot_be_int)
    data.loc[invalid_zip_mask, 'Zip Code'] = np.nan
    data['Zip Code'] = pd.to_numeric(data['Zip Code'], errors='coerce')
    data.loc[data['Zip Code'] < 100, 'Zip Code'] = np.nan
    data.loc[data['Zip Code'] < 1000, 'Zip Code'] *= 10

    # Process zip codes with specific patterns
    zip_codes_with_last_digit_zero_7 = data[
        data['Zip Code'].notna() &
        (data['Zip Code'].astype(str).str.len() == 7) &
        (data['Zip Code'].astype(str).str.endswith('0'))
    ].index
    data.loc[zip_codes_with_last_digit_zero_7, 'Zip Code'] = data.loc[
        zip_codes_with_last_digit_zero_7, 'Zip Code'
    ].apply(lambda x: int(float(x)))

    zip_codes_with_last_digit_zero_6 = data[
        data['Zip Code'].notna() &
        (data['Zip Code'].astype(str).str.len() == 6) &
        (data['Zip Code'].astype(str).str.endswith('0'))
    ].index
    data.loc[zip_codes_with_last_digit_zero_6, 'Zip Code'] = data.loc[
        zip_codes_with_last_digit_zero_6, 'Zip Code'
    ].apply(lambda x: int(float(x)))

    # Consolidate industry codes, cause of injury codes, and part of body codes
    data['Industry Code'] = data['Industry Code'].replace({33: 31, 32: 31, 45: 44, 49: 48})
    data['WCIO Cause of Injury Code'] = data['WCIO Cause of Injury Code'].replace({17: 79, 66: 79, 94: 97})
    data['WCIO Part Of Body Code'] = data['WCIO Part Of Body Code'].replace({22: 43, 25: 18, 47: 23, -9: 10})

    # Return the transformed dataset
    return data

__`Step 3.3`__ Define a function to do row wise transformations.

In [79]:
def additional_row_wise_transformations(data):
    
    # Replace 'U' with 'N' in 'Alternative Dispute Resolution'
    if 'Alternative Dispute Resolution' in data.columns:
        data['Alternative Dispute Resolution'] = data['Alternative Dispute Resolution'].replace('U', 'N')

    # Replace specific values in 'Carrier Type' with 'Other'
    if 'Carrier Type' in data.columns:
        data['Carrier Type'] = data['Carrier Type'].replace(
            {'5C. SPECIAL FUND - POI CARRIER WCB MENANDS': 'Other',
             '5A. SPECIAL FUND - CONS. COMM. (SECT. 25-A)': 'Other'}
        )

    # Replace 'U' and 'X' with NaN in 'Gender'
    if 'Gender' in data.columns:
        data['Gender'] = data['Gender'].replace(['U', 'X'], np.nan)

    return data

__`Step 3.4`__ Define functions and tasks to handle missing values.

In [80]:
def apply_imputation_dict(var_imput, var_group, imputation_dic, df):
    def impute_var(row):
        if pd.isnull(row[var_imput]):
            return imputation_dic.get(row[var_group], row[var_imput])
        else:
            return row[var_imput]
    
    df[var_imput] = df.apply(lambda row: impute_var(row), axis=1)
    return df

In [81]:
imputation_tasks = [
    {'var_imput': 'Zip Code', 'var_group': 'County of Injury', 'strategy': 'mode'},
    {'var_imput': 'Industry Code', 'var_group': 'Carrier Name', 'strategy': 'mode'},
    {'var_imput': 'Industry Code', 'var_group': 'Carrier Type', 'strategy': 'mode'},
    {'var_imput': 'Average Weekly Wage', 'var_group': 'Industry Code', 'strategy': 'mean'},
    {'var_imput': 'Gender', 'var_group': 'Industry Code', 'strategy': 'mode'}
]

features_to_impute = ['WCIO Cause of Injury Code', 'WCIO Nature of Injury Code', 'WCIO Part Of Body Code']
for feature in features_to_impute:
    imputation_tasks.append({'var_imput': feature, 'var_group': 'Industry Code', 'strategy': 'mode'})

In [82]:
def handle_missing_values_test(test_data, imputation_tasks, all_imputation_dicts):
    
    warnings.filterwarnings("ignore")

    # Solve missing values in 'Birth Year' using 'Accident Date' and 'Age at Injury'
    test_data["Birth Year"] = test_data["Birth Year"].fillna(
        pd.to_datetime(test_data['Accident Date']).dt.year - test_data['Age at Injury']
    )

    # Solve missing values in 'Age at Injury' using 'Accident Date' and 'Birth Year'
    test_data["Age at Injury"] = test_data["Age at Injury"].fillna(
        pd.to_datetime(test_data['Accident Date']).dt.year - test_data['Birth Year']
    )

    # Fill missing values in 'First Hearing Date' with a default value
    test_data['First Hearing Date'] = test_data['First Hearing Date'].fillna('2030-01-01')

    # Replace missing values in 'IME-4 Count' with 0
    test_data['IME-4 Count'].replace(np.nan, 0, inplace=True)

    # Iterate through imputation tasks and apply the imputation strategy to the test data
    for task in imputation_tasks:
        var_imput = task['var_imput']
        var_group = task['var_group']
        strategy = task['strategy']
        
        # Apply the imputation dictionary stored from the training process
        imputation_dic = all_imputation_dicts.get(var_imput)
        
        # If an imputation dictionary is found, apply it to the test data
        if imputation_dic:
            test_data = apply_imputation_dict(var_imput, var_group, imputation_dic, test_data)
    
    return test_data

__`Step 3.5`__ Define a function to perform feature engeneering.

In [83]:
def feature_engineering_test(X_test, count_encoder, var_encoding):
 
    # Step 1: Transform categorical columns using the fitted encoder
    X_test.drop(columns=['C-2 Date', 'C-3 Date', 'Industry Code Description',
       'WCIO Nature of Injury Description', 'WCIO Cause of Injury Description',
       'WCIO Part of Body Description'] , inplace=True, errors='ignore')

    X_test[var_encoding] = count_encoder.transform(X_test)[var_encoding]

    # Step 2: Transform date columns into numerical days since '2000-01-01'
    date_cols = ['Accident Date', 'Assembly Date', 'First Hearing Date']
    for col in date_cols:   	
        X_test[col] = (X_test[col] - pd.Timestamp('2000-01-01')) // pd.Timedelta('1D')

    # Step 3: Transform 'Zip Code' to integer
    X_test['Zip Code'] = X_test['Zip Code'].astype(int)

    feature_order = model.get_booster().feature_names
    X_test = X_test[feature_order]

    return X_test

- We adapted some of the functions used during preprocessing so that we could use them to treat the data that enters our interface.

__`Step 4`__ Define variables to encode and mapping dictionaries to match each code to a description.

In [84]:
var_encoding = ['Carrier Name', 'County of Injury', 'Zip Code', 'Industry Code', 'WCIO Cause of Injury Code', 'WCIO Nature of Injury Code', 
                'WCIO Part Of Body Code', 'Alternative Dispute Resolution', 'Attorney/Representative', 'Gender', 'Medical Fee Region', 
                'Carrier Type', 'C-2 Report Status', 'C-3 Report Status', 'District Name', 'COVID-19 Indicator', 'Average Weekly Wage ZERO']


industry_mapping = {
    11: "AGRICULTURE, FORESTRY, FISHING AND HUNTING",
    21: "MINING",
    22: "UTILITIES",
    23: "CONSTRUCTION",
    31: "MANUFACTURING",
    32: "MANUFACTURING",
    33: "MANUFACTURING",
    42: "WHOLESALE TRADE",
    44: "RETAIL TRADE",
    45: "RETAIL TRADE",
    48: "TRANSPORTATION AND WAREHOUSING",
    49: "TRANSPORTATION AND WAREHOUSING",
    51: "INFORMATION",
    52: "FINANCE AND INSURANCE",
    53: "REAL ESTATE AND RENTAL AND LEASING",
    54: "PROFESSIONAL, SCIENTIFIC, AND TECHNICAL SERVICES",
    55: "MANAGEMENT OF COMPANIES AND ENTERPRISES",
    56: "ADMINISTRATIVE AND SUPPORT AND WASTE MANAGEMENT",
    61: "EDUCATIONAL SERVICES",
    62: "HEALTH CARE AND SOCIAL ASSISTANCE",
    71: "ARTS, ENTERTAINMENT, AND RECREATION",
    72: "ACCOMMODATION AND FOOD SERVICES",
    81: "OTHER SERVICES (EXCEPT PUBLIC ADMINISTRATION)",
    92: "PUBLIC ADMINISTRATION"
}
wcio_mapping = {
    1: "NO PHYSICAL INJURY",
    2: "AMPUTATION",
    3: "ANGINA PECTORIS",
    4: "BURN",
    7: "CONCUSSION",
    10: "CONTUSION",
    13: "CRUSHING",
    16: "DISLOCATION",
    19: "ELECTRIC SHOCK",
    22: "ENUCLEATION",
    25: "FOREIGN BODY",
    28: "FRACTURE",
    30: "FREEZING",
    31: "HEARING LOSS OR IMPAIRMENT",
    32: "HEAT PROSTRATION",
    34: "HERNIA",
    36: "INFECTION",
    37: "INFLAMMATION",
    38: "ADVERSE REACTION TO A VACCINATION OR INOCULATION",
    40: "LACERATION",
    41: "MYOCARDIAL INFARCTION",
    42: "POISONING - GENERAL (NOT OD OR CUMULATIVE)",
    43: "PUNCTURE",
    46: "RUPTURE",
    47: "SEVERANCE",
    49: "SPRAIN OR TEAR",
    52: "STRAIN OR TEAR",
    53: "SYNCOPE",
    54: "ASPHYXIATION",
    55: "VASCULAR",
    58: "VISION LOSS",
    59: "ALL OTHER SPECIFIC INJURIES, NOC",
    60: "DUST DISEASE, NOC",
    61: "ASBESTOSIS",
    62: "BLACK LUNG",
    63: "BYSSINOSIS",
    64: "SILICOSIS",
    65: "RESPIRATORY DISORDERS",
    66: "POISONING - CHEMICAL, (OTHER THAN METALS)",
    67: "POISONING - METAL",
    68: "DERMATITIS",
    69: "MENTAL DISORDER",
    70: "RADIATION",
    71: "ALL OTHER OCCUPATIONAL DISEASE INJURY, NOC",
    72: "LOSS OF HEARING",
    73: "CONTAGIOUS DISEASE",
    74: "CANCER",
    75: "AIDS",
    76: "VDT - RELATED DISEASES",
    77: "MENTAL STRESS",
    78: "CARPAL TUNNEL SYNDROME",
    79: "HEPATITIS C",
    80: "ALL OTHER CUMULATIVE INJURY, NOC",
    83: "COVID-19",
    90: "MULTIPLE PHYSICAL INJURIES ONLY",
    91: "MULTIPLE INJURIES INCLUDING BOTH PHYSICAL AND MENTAL"
}

wcio_cause_mapping = {
    1: "CHEMICALS",
    2: "HOT OBJECTS OR SUBSTANCES",
    3: "TEMPERATURE EXTREMES",
    4: "FIRE OR FLAME",
    5: "STEAM OR HOT FLUIDS",
    6: "DUST, GASES, FUMES OR VAPORS",
    7: "WELDING OPERATION",
    8: "RADIATION",
    9: "CONTACT WITH, NOC",
    10: "MACHINE OR MACHINERY",
    11: "COLD OBJECTS OR SUBSTANCES",
    12: "OBJECT HANDLED",
    13: "CAUGHT IN, UNDER OR BETWEEN, NOC",
    14: "ABNORMAL AIR PRESSURE",
    15: "BROKEN GLASS",
    16: "HAND TOOL, UTENSIL; NOT POWERED",
    17: "OBJECT BEING LIFTED OR HANDLED",
    18: "POWERED HAND TOOL, APPLIANCE",
    19: "CUT, PUNCTURE, SCRAPE, NOC",
    20: "COLLAPSING MATERIALS (SLIDES OF EARTH)",
    25: "FROM DIFFERENT LEVEL (ELEVATION)",
    26: "FROM LADDER OR SCAFFOLDING",
    27: "FROM LIQUID OR GREASE SPILLS",
    28: "INTO OPENINGS",
    29: "ON SAME LEVEL",
    30: "SLIP, OR TRIP, DID NOT FALL",
    31: "FALL, SLIP OR TRIP, NOC",
    32: "ON ICE OR SNOW",
    33: "ON STAIRS",
    40: "CRASH OF WATER VEHICLE",
    41: "CRASH OF RAIL VEHICLE",
    45: "COLLISION OR SIDESWIPE WITH ANOTHER VEHICLE",
    46: "COLLISION WITH A FIXED OBJECT",
    47: "CRASH OF AIRPLANE",
    48: "VEHICLE UPSET",
    50: "MOTOR VEHICLE, NOC",
    52: "CONTINUAL NOISE",
    53: "TWISTING",
    54: "JUMPING OR LEAPING",
    55: "HOLDING OR CARRYING",
    56: "LIFTING",
    57: "PUSHING OR PULLING",
    58: "REACHING",
    59: "USING TOOL OR MACHINERY",
    60: "STRAIN OR INJURY BY, NOC",
    61: "WIELDING OR THROWING",
    65: "MOVING PART OF MACHINE",
    66: "SANDING, SCRAPING, CLEANING OPERATION",
    67: "STATIONARY OBJECT",
    68: "STEPPING ON SHARP OBJECT",
    69: "STRIKING AGAINST OR STEPPING ON, NOC",
    70: "FELLOW WORKER, PATIENT OR OTHER PERSON",
    74: "FALLING OR FLYING OBJECT",
    75: "HAND TOOL OR MACHINE IN USE",
    76: "MOTOR VEHICLE",
    77: "MOVING PARTS OF MACHINE",
    78: "OBJECT HANDLED BY OTHERS",
    79: "STRUCK OR INJURED, NOC",
    80: "ABSORPTION, INGESTION OR INHALATION, NOC",
    81: "PANDEMIC",
    82: "ELECTRICAL CURRENT",
    83: "ANIMAL OR INSECT",
    84: "EXPLOSION OR FLARE BACK",
    85: "FOREIGN MATTER (BODY) IN EYE(S)",
    86: "NATURAL DISASTERS",
    87: "PERSON IN ACT OF A CRIME",
    88: "OTHER THAN PHYSICAL CAUSE OF INJURY",
    89: "MOLD",
    90: "GUNSHOT",
    91: "REPETITIVE MOTION",
    93: "RUBBED OR ABRADED, NOC",
    94: "TERRORISM",
    95: "CUMULATIVE, NOC",
    96: "OTHER - MISCELLANEOUS, NOC"
}
wcio_body_mapping = {
    62: "BUTTOCKS",
    38: "SHOULDER(S)",
    10: "MULTIPLE HEAD INJURY",
    36: "FINGER(S)",
    60: "LUNGS",
    14: "EYE(S)",
    55: "ANKLE",
    53: "KNEE",
    37: "THUMB",
    42: "LOWER BACK AREA",
    61: "ABDOMEN INCLUDING GROIN",
    54: "LOWER LEG",
    51: "HIP",
    52: "UPPER LEG",
    17: "MOUTH",
    34: "WRIST",
    23: "SPINAL CORD",
    35: "HAND",
    18: "SOFT TISSUE",
    31: "UPPER ARM",
    25: "FOOT",
    56: "ELBOW",
    32: "MULTIPLE UPPER EXTREMITIES",
    30: "MULTIPLE BODY PARTS (INCLUDING BODY)",
    90: "BODY SYSTEMS AND MULTIPLE BODY SYSTEMS",
    91: "MULTIPLE NECK INJURY",
    20: "CHEST",
    44: "WRIST (S) & HAND(S)",
    39: "EAR(S)",
    13: "MULTIPLE LOWER EXTREMITIES",
    50: "DISC",
    43: "LOWER ARM",
    33: "MULTIPLE",
    22: "UPPER BACK AREA",
    41: "SKULL",
    11: "TOES",
    57: "FACIAL BONES",
    19: "TEETH",
    16: "NO PHYSICAL INJURY",
    66: "MULTIPLE TRUNK",
    40: "WHOLE BODY",
    99: "INSUFFICIENT INFO TO PROPERLY IDENTIFY - UNCLASSIFIED",
    65: "PELVIS",
    46: "NOSE",
    15: "GREAT TOE",
    58: "INTERNAL ORGANS",
    48: "HEART",
    49: "VERTEBRAE",
    21: "LUMBAR & OR SACRAL VERTEBRAE (VERTEBRA)",
    63: "BRAIN",
    12: "SACRUM AND COCCYX",
    45: "ARTIFICIAL APPLIANCE",
    64: "LARYNX",
    24: "TRACHEA"
}

__`Step 5`__ Define fuctions to match each code to a description, apply the preprocessing and build GUI.

In [85]:
def update_body_description(*args):
    """Update WCIO Part of Body Description based on selected Body Code."""
    try:
        code = int(body_code.get())
        body_description.set(wcio_body_mapping.get(code, "Unknown Part of Body Code"))
    except ValueError:
        body_description.set("Unknown Part of Body Code")

def update_cause_description(*args):
    """Update WCIO Cause of Injury Description based on selected Cause Code."""
    code = cause_code.get()
    cause_description.set(wcio_cause_mapping.get(int(code), "Unknown Cause Code"))

def update_wcio_description(*args):
    """Update WCIO Cause of Injury Description based on selected WCIO Code."""
    code = wcio_code.get()
    wcio_description.set(wcio_mapping.get(float(code), "Unknown WCIO Code"))

def update_industry_description(*args):
    """Update Industry Code Description based on selected Industry Code."""
    code = industry_code.get()
    industry_description.set(industry_mapping.get(int(code), "Unknown Industry Code"))
def toggle_date_entry(entry_widget, checkbox_var):
    entry_widget.configure(state=tk.DISABLED if checkbox_var.get() else tk.NORMAL)

def submit():
    # Collect inputs
    inputs = {
        "Claim Identifier": claim_identifier.get(),
        "C-2 Date": datetime.strptime(c2_date.get(), '%m/%d/%y'),
        "C-3 Date": datetime.strptime(c3_date.get(), '%m/%d/%y'),
        "Industry Code Description": industry_description.get(),
        "WCIO Nature of Injury Description": wcio_description.get(),
        "WCIO Cause of Injury Description": cause_description.get(),
        "WCIO Part of Body Description": body_description.get(),
        "Accident Date": datetime.strptime(accident_date.get(), '%m/%d/%y'),
        "Alternative Dispute Resolution": adr.get(),
        "Assembly Date": datetime.strptime(assembly_date.get(), '%m/%d/%y'),
        "Attorney/Representative": attorney.get(),
        "Average Weekly Wage": float(avg_weekly_wage.get()),
        "Age at Injury": int(age_at_injury.get()),
        "Birth Year": int(birth_year.get()),
        "Carrier Name": carrier_name.get(),
        "Carrier Type": carrier_type.get(),
        "County of Injury": county.get(),
        "COVID-19 Indicator": covid_indicator.get(),
        "District Name": district_name.get(),
        "First Hearing Date": datetime.strptime(first_hearing_date.get(), '%m/%d/%y'),
        "Gender": gender.get(),
        "IME-4 Count": int(ime4_count.get()),
        "Industry Code": industry_code.get(),
        "Medical Fee Region": medical_fee_region.get(),
        "WCIO Cause of Injury Code": cause_code.get(),
        "WCIO Nature of Injury Code": wcio_code.get(),
        "WCIO Part Of Body Code": body_code.get(),
        "Zip Code": zip_code.get(),
        "Number of Dependents": int(dependents.get())
    }
    df = pd.DataFrame([inputs])

    df.set_index('Claim Identifier', inplace=True)

    df['Time to Assembly'] = (df['Assembly Date'] - df['Accident Date']).dt.days

    df['C-2 Report Status'] = df.apply(
        lambda row: 'Not Received' if pd.isna(row['C-2 Date']) 
                    else ('Received on or before Assembly' if row['C-2 Date'] <= row['Assembly Date'] 
                         else 'Received after Assembly'),
        axis=1
    )

    df['C-3 Report Status'] = df.apply(
        lambda row: 'Not Received' if pd.isna(row['C-3 Date']) 
                    else ('Received on or before Assembly' if row['C-3 Date'] <= row['Assembly Date'] 
                        else 'Received after Assembly'),
        axis=1
    )

    df = treating_incoherences(df)
    df = additional_row_wise_transformations(df)
    df = handle_missing_values_test(df, imputation_tasks, all_imputation_dicts)
    df=feature_engineering_test(df, count_encoder, var_encoding)

    C_remove_feature_selection = ['Number of Dependents', 'Time to Assembly', 'Accident Date','Birth Year']
    df.drop(columns=C_remove_feature_selection , inplace=True, errors='ignore')
    prediction = model.predict(df)
    claim_injury_type = label_encoder.inverse_transform(prediction)
    result_label.config(text=f"{claim_injury_type[0]}")

# Create main window
root = tk.Tk()
root.title("Prediction Model Input Interface")
root.geometry("750x450")

# Frame for better organization
frame = ttk.Frame(root, padding="10")
frame.grid(row=0, column=0, sticky=(tk.W, tk.E))

claimant_label = ttk.Label(frame, text="Claimant Information",font=("Arial", 12))
claimant_label.grid(row=1, column=1, columnspan=2, pady=5, padx=5)

ttk.Label(frame, text="Zip Code:").grid(row=2, column=1, sticky=tk.W)
zip_code = ttk.Entry(frame)
zip_code.grid(row=2, column=2, sticky=tk.W)

ttk.Label(frame, text="Birth Year:").grid(row=3, column=1, sticky=tk.W)
birth_year = ttk.Entry(frame)
birth_year.grid(row=3, column=2, sticky=tk.W)

ttk.Label(frame, text="Gender:").grid(row=4, column=1, sticky=tk.W)
gender = ttk.Combobox(frame, values=['M','F','U','X'], state="readonly")
gender.grid(row=4, column=2, sticky=tk.W)

ttk.Label(frame, text="Average Weekly Wage:").grid(row=5, column=1, sticky=tk.W)
avg_weekly_wage = ttk.Entry(frame)
avg_weekly_wage.grid(row=5, column=2, sticky=tk.W)

ttk.Label(frame, text="Number of Dependents:").grid(row=6, column=1, sticky=tk.W)
dependents = ttk.Entry(frame)
dependents.grid(row=6, column=2, sticky=tk.W)

ttk.Label(frame, text="Age at Injury:").grid(row=7, column=1, sticky=tk.W)
age_at_injury = ttk.Entry(frame)
age_at_injury.grid(row=7, column=2, sticky=tk.W)

healthcare_label = ttk.Label(frame, text="Healthcare Information",font=("Arial", 12))
healthcare_label.grid(row=9, column=1, columnspan=2, pady=5, padx=5)

ttk.Label(frame, text="Accident Date:").grid(row=10, column=1, sticky=tk.W)
accident_date = DateEntry(frame)
accident_date.grid(row=10, column=2, sticky=tk.W)

ttk.Label(frame, text="Carrier Name:").grid(row=11, column=1, sticky=tk.W)
carrier_name = ttk.Entry(frame)
carrier_name.grid(row=11, column=2, sticky=tk.W)

ttk.Label(frame, text="Carrier Type:").grid(row=12, column=1, sticky=tk.W)
carrier_type = ttk.Combobox(frame, values=['1A. PRIVATE','2A. SIF', '4A. SELF PRIVATE',
       '3A. SELF PUBLIC', 'UNKNOWN', '5D. SPECIAL FUND - UNKNOWN',
       '5A. SPECIAL FUND - CONS. COMM. (SECT. 25-A)',
       '5C. SPECIAL FUND - POI CARRIER WCB MENANDS'], state="readonly")
carrier_type.grid(row=12, column=2, sticky=tk.W)

ttk.Label(frame, text="IME-4 Count:").grid(row=13, column=1, sticky=tk.W)
ime4_count = ttk.Entry(frame)
ime4_count.grid(row=13, column=2, sticky=tk.W)

ttk.Label(frame, text="Medical Fee Region:").grid(row=14, column=1, sticky=tk.W)
medical_fee_region = ttk.Combobox(frame, values=['I','II','III', 'IV','UK'], state="readonly")
medical_fee_region.grid(row=14, column=2, sticky=tk.W)

ttk.Label(frame, text="COVID-19 Indicator:").grid(row=15, column=1, sticky=tk.W)
covid_indicator = ttk.Combobox(frame, values=['Y','N'], state="readonly")
covid_indicator.grid(row=15, column=2, sticky=tk.W)

ttk.Label(frame, text="County of Injury:").grid(row=16, column=1, sticky=tk.W)
county = ttk.Entry(frame)
county.grid(row=16, column=2, sticky=tk.W)

Claim_information_label = ttk.Label(frame, text="Claim Information",font=("Arial", 12))
Claim_information_label.grid(row=1, column=4, columnspan=2, pady=5, padx=5,sticky=tk.E)

ttk.Label(frame, text="Claim Identifier:").grid(row=2, column=5, sticky=tk.E)
claim_identifier = ttk.Entry(frame)
claim_identifier.grid(row=2, column=6, sticky=tk.E+tk.W)


ttk.Label(frame, text="Assembly Date:").grid(row=3, column=5, sticky=tk.E)
assembly_date = DateEntry(frame)
assembly_date.grid(row=3, column=6, sticky=tk.E)

c2_missing = tk.BooleanVar(value=False)
ttk.Label(frame, text="C-2 Date:").grid(row=4, column=5, sticky=tk.E)
c2_date = DateEntry(frame)
c2_date.grid(row=4, column=6, sticky=tk.E)
c2_checkbox = ttk.Checkbutton(frame, text="NaN", variable=c2_missing, 
                               command=lambda: toggle_date_entry(c2_date, c2_missing))
c2_checkbox.grid(row=4, column=7, sticky=tk.W)

c3_missing = tk.BooleanVar(value=False)
ttk.Label(frame, text="C-3 Date:").grid(row=5, column=5, sticky=tk.E)
c3_date = DateEntry(frame)
c3_date.grid(row=5, column=6, sticky=tk.E)
c3_checkbox = ttk.Checkbutton(frame, text="NaN", variable=c3_missing, 
                               command=lambda: toggle_date_entry(c3_date, c3_missing))
c3_checkbox.grid(row=5, column=7, sticky=tk.W)

first_hearing_missing = tk.BooleanVar(value=False)
ttk.Label(frame, text="First Hearing Date:").grid(row=6, column=5, sticky=tk.E)
first_hearing_date = DateEntry(frame)
first_hearing_date.grid(row=6, column=6, sticky=tk.E)
first_hearing_checkbox = ttk.Checkbutton(frame, text="NaN", variable=first_hearing_missing, 
                                          command=lambda: toggle_date_entry(first_hearing_date, first_hearing_missing))
first_hearing_checkbox.grid(row=6, column=7, sticky=tk.W)

# Dropdowns for binary choices

ttk.Label(frame, text="Alternative Dispute Resolution:").grid(row=7, column=5, sticky=tk.E)
adr = ttk.Combobox(frame, values=['Y','N','U'], state="readonly")
adr.grid(row=7, column=6, sticky=tk.E)

ttk.Label(frame, text="Attorney/Representative:").grid(row=8, column=5, sticky=tk.E)
attorney = ttk.Combobox(frame, values=['Y','N'], state="readonly")
attorney.grid(row=8, column=6, sticky=tk.E)
# Dropdown with options

ttk.Label(frame, text="District Name:").grid(row=9, column=5, sticky=tk.E)
district_name = ttk.Combobox(frame, values=['SYRACUSE', 'ROCHESTER', 'ALBANY', 'HAUPPAUGE', 'NYC',
       'BUFFALO', 'BINGHAMTON', 'STATEWIDE'], state="readonly")
district_name.grid(row=9, column=6, sticky=tk.E)


# Industry Code Dropdown
ttk.Label(frame, text="Industry Code:").grid(row=10, column=5, sticky=tk.E)
industry_code = tk.StringVar()
industry_code_dropdown = ttk.Combobox(frame, textvariable=industry_code, values=list(industry_mapping.keys()), state="readonly")
industry_code_dropdown.grid(row=10, column=6, sticky=tk.E)
industry_code.trace_add("write", update_industry_description)

# Industry Code Description
ttk.Label(frame, text="Industry Code Description:").grid(row=11, column=5, sticky=tk.E)
industry_description = tk.StringVar()
description_label = ttk.Label(frame, textvariable=industry_description)
description_label.grid(row=11, column=6, sticky=tk.E)

# WCIO Nature of Injury Code Dropdown
ttk.Label(frame, text="WCIO Nature of Injury Code:").grid(row=12, column=5, sticky=tk.E)
wcio_code = tk.StringVar()
wcio_code_dropdown = ttk.Combobox(frame, textvariable=wcio_code, values=list(wcio_mapping.keys()), state="readonly")
wcio_code_dropdown.grid(row=12, column=6, sticky=tk.E)
wcio_code.trace_add("write", update_wcio_description)

# WCIO Nature of Injury Description
ttk.Label(frame, text="WCIO Nature of Injury Description:").grid(row=13, column=5, sticky=tk.E)
wcio_description = tk.StringVar()
wcio_description_label = ttk.Label(frame, textvariable=wcio_description)
wcio_description_label.grid(row=13, column=6, sticky=tk.E)

# WCIO Cause of Injury Code Dropdown
ttk.Label(frame, text="WCIO Cause of Injury Code:").grid(row=14, column=5, sticky=tk.E)
cause_code = tk.StringVar()
cause_code_dropdown = ttk.Combobox(frame, textvariable=cause_code, values=list(wcio_cause_mapping.keys()), state="readonly")
cause_code_dropdown.grid(row=14, column=6, sticky=tk.E)
cause_code.trace_add("write", update_cause_description)

# WCIO Cause of Injury Description
ttk.Label(frame, text="WCIO Cause of Injury Description:").grid(row=15, column=5, sticky=tk.E)
cause_description = tk.StringVar()
cause_description_label = ttk.Label(frame, textvariable=cause_description)
cause_description_label.grid(row=15, column=6, sticky=tk.E)

ttk.Label(frame, text="WCIO Part of Body Code:").grid(row=16, column=5, sticky=tk.E)
body_code = tk.StringVar()
body_code_dropdown = ttk.Combobox(frame, textvariable=body_code, values=list(wcio_body_mapping.keys()), state="readonly")
body_code_dropdown.grid(row=16, column=6, sticky=tk.E)
body_code.trace_add("write", update_body_description)

# WCIO Part of Body Description
ttk.Label(frame, text="WCIO Part of Body Description:").grid(row=17, column=5, sticky=tk.E)
body_description = tk.StringVar()
body_description_label = ttk.Label(frame, textvariable=body_description)
body_description_label.grid(row=17, column=6, sticky=tk.E)

# Submit button
submit_button = ttk.Button(frame,width=25, text="Predict Claim Injury", command=submit)
submit_button.grid(row=30, column=3, columnspan=3, pady=10,padx=10,sticky=tk.W+tk.E)

result_label = ttk.Label(frame, text="Prediction result will appear here.")
result_label.grid(row=30, column=6, columnspan=2, pady=10)
# Run the application

root.mainloop()