In [4]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler, StandardScaler
import numpy as np

def has_outliers(series):
    """
    Determine if a numeric series has outliers using the IQR method.
    """
    q1 = series.quantile(0.25)
    q3 = series.quantile(0.75)
    iqr = q3 - q1
    lower_bound = q1 - 1.5 * iqr
    upper_bound = q3 + 1.5 * iqr
    return ((series < lower_bound) | (series > upper_bound)).any()

def normalize_data(df):
    """
    Normalize numerical data in the DataFrame using Min-Max Scaling or Z-Score Normalization.
    """
    # Initialize scalers
    min_max_scaler = MinMaxScaler()
    standard_scaler = StandardScaler()

    for column in df.columns:
        if df[column].dtype in ['int64', 'float64']:  # Check for numerical columns
            # Convert to numeric if not already
            df[column] = pd.to_numeric(df[column], errors='coerce')

            # Determine if outliers are present
            if has_outliers(df[column]):
                # Apply Z-Score Normalization if outliers are present
                df[column] = standard_scaler.fit_transform(df[[column]])
            else:
                # Apply Min-Max Normalization if no outliers are present
                df[column] = min_max_scaler.fit_transform(df[[column]])

    return df

def main():
    # Load the Excel file
    file_path = "/content/Lab Session Data.xlsx"
    df = pd.read_excel(file_path, sheet_name='thyroid0387_UCI')

    # Replace '?' with NaN
    df.replace('?', np.nan, inplace=True)

    # Normalize the data
    df = normalize_data(df)

    # Print the first few rows after normalization
    print("After normalization:")
    print(df.head())

    # Save the normalized dataframe back to an Excel file
    output_path = '/content/thyroid0387_UCI_normalized.xlsx'
    df.to_excel(output_path, index=False)

if __name__ == "__main__":
    main()


After normalization:
      Record ID       age sex on thyroxine query on thyroxine  \
0  0.000000e+00 -0.037634   F            f                  f   
1  3.410871e-08 -0.037634   F            f                  f   
2  9.891527e-07 -0.027499   F            f                  f   
3  6.934301e-05 -0.031722   F            f                  f   
4  6.937712e-05 -0.035100   F            f                  f   

  on antithyroid medication sick pregnant thyroid surgery I131 treatment  ...  \
0                         f    f        f               f              f  ...   
1                         f    f        f               f              f  ...   
2                         f    f        f               f              f  ...   
3                         f    f        f               f              f  ...   
4                         f    f        f               f              f  ...   

  TT4 measured       TT4 T4U measured T4U FTI measured FTI TBG measured  \
0            f       NaN  