In [8]:
import pandas as pd
import numpy as np

def has_outliers(series):
    """
    Determine if a numeric series has outliers using the IQR method.
    """
    q1 = series.quantile(0.25)
    q3 = series.quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    return ((series < lower_bound) | (series > upper_bound)).any()

def fill_missing_values(df):
    """
    Fill missing values in the dataframe based on the attribute type and presence of outliers.
    """
    for column in df.columns:
        if df[column].dtype == 'object':
            # Fill categorical columns with the mode
            df[column].fillna(df[column].mode()[0], inplace=True)
        else:
            # Fill numeric columns with mean or median based on outliers
            if has_outliers(df[column]):
                df[column].fillna(df[column].median(), inplace=True)
            else:
                df[column].fillna(df[column].mean(), inplace=True)
    return df

def main():
    # Load the Excel file
    file_path = "C:/Users/year3/Downloads/Lab Session Data.xlsx"

    
    # Read the Excel file
    df = read_excel(file_path, sheet_name='thyroid0387_UCI')
    
    
    # Replace '?' with NaN
    df.replace('?', np.nan, inplace=True)
    
    # Convert all columns to their appropriate types
    df = df.apply(pd.to_numeric, errors='ignore')
    
    # Print the first few rows before filling missing values
    print("Before filling missing values:")
    print(df.head())
    
    # Fill missing values
    df = fill_missing_values(df)
    
    # Print the first few rows after filling missing values
    print("\nAfter filling missing values:")
    print(df.head())
    
    # Save the updated dataframe back to an Excel file
    output_path = 'C:/Users/year3/Downloads/thyroid0387_UCI_filled.xlsx'
    df.to_excel(output_path, index=False)
    
if __name__ == "__main__":
    main()


Before filling missing values:
   Record ID  age sex on thyroxine query on thyroxine  \
0  840801013   29   F            f                  f   
1  840801014   29   F            f                  f   
2  840801042   41   F            f                  f   
3  840803046   36   F            f                  f   
4  840803047   32   F            f                  f   

  on antithyroid medication sick pregnant thyroid surgery I131 treatment  ...  \
0                         f    f        f               f              f  ...   
1                         f    f        f               f              f  ...   
2                         f    f        f               f              f  ...   
3                         f    f        f               f              f  ...   
4                         f    f        f               f              f  ...   

  TT4 measured    TT4 T4U measured T4U FTI measured FTI TBG measured   TBG  \
0            f    NaN            f NaN            f NaN      