#### ***ML_Modules.py***

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler


def check_correlation(input_df):
    """
    Returns boolean correlation matrix: True where |corr| >= 0.75
    """
    corr = input_df.corr()
    corr_bool = corr.abs() >= 0.75
    return corr_bool


def data_scale(input_df):
    """
    Standardizes numeric feature columns (mean=0, std=1)
    """
    scaler = StandardScaler()
    scaled_arr = scaler.fit_transform(input_df)
    scaled_df = pd.DataFrame(scaled_arr, columns=input_df.columns, index=input_df.index)
    return scaled_df

#### ***Main.py***

In [None]:
import os
import sys
import warnings
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import ML_Modules as mm

warnings.simplefilter("ignore")
warnings.filterwarnings("ignore")


def main():
    filename = input().strip()
    file_path = os.path.join(sys.path[0], filename)

    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found.")
        sys.exit(1)

    # === Label Encoding Categorical Columns ===
    print("=== Label Encoding Categorical Columns ===")
    le_salary = LabelEncoder()
    df["salary.enc"] = le_salary.fit_transform(df["salary"])
    salary_classes = list(le_salary.classes_)

    le_dept = LabelEncoder()
    df["Department.enc"] = le_dept.fit_transform(df["Department"])
    dept_classes = list(le_dept.classes_)

    print(f"Encoded salary classes: {salary_classes}")
    print(f"Encoded Department classes: {dept_classes}\n")

    # Drop original categorical columns
    df = df.drop(columns=["Department", "salary"])

    # === Separating Features and Label ===
    print("=== Separating Features and Label ===")
    if "left" not in df.columns:
        print("Error: 'left' column not found.")
        sys.exit(1)

    y = df["left"]
    X = df.drop(columns=["left"])

    print(f"Input Features Shape: {X.shape}")
    print(f"Label Shape: {y.shape}\n")

    # === Correlation Boolean Matrix (correlation >= 0.75) ===
    print("=== Correlation Boolean Matrix (correlation >= 0.75) ===")
    corr_bool = mm.check_correlation(X)
    print(corr_bool)

    # === Scaled Feature Sample (First 5 Rows) ===
    print("=== Scaled Feature Sample (First 5 Rows) ===")
    X_scaled = mm.data_scale(X)
    head_scaled = X_scaled.head()
    print(head_scaled)

    # === Splitting Data into Train (80%) and Test (20%) ===
    print("=== Splitting Data into Train (80%) and Test (20%) ===")
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42
    )

    print(f"Training Features Shape: {X_train.shape}")
    print(f"Training Labels Shape: {y_train.shape}")
    print(f"Testing Features Shape: {X_test.shape}")
    print(f"Testing Labels Shape: {y_test.shape}")


main()