#### ***Day2 Concept:3***

#### ***ML_Modules.py***

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    confusion_matrix,
    classification_report,
    accuracy_score,
    recall_score,
    f1_score,
    precision_score,
)


def treat_outliers(data):
    """
    Apply IQR winsorization (1.5 * IQR caps) column-wise and return a new DataFrame.
    """
    df = data.copy().astype(float)
    Q1 = df.quantile(0.25)
    Q3 = df.quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR

    for col in df.columns:
        df[col] = np.where(df[col] < lower[col], lower[col], df[col])
        df[col] = np.where(df[col] > upper[col], upper[col], df[col])

    return df


def data_scale(X):
    scaler = StandardScaler()
    scaled = scaler.fit_transform(X)
    return pd.DataFrame(scaled, columns=X.columns, index=X.index)


def evaluate_classifier(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix")
    print(cm)
    print("===================", end="\n\n")

    print("Classification Report:")
    print(classification_report(y_test, y_pred))
    print("===================")

    acc = accuracy_score(y_test, y_pred)
    rec = recall_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred)

    print(f"accuracy: {acc:.3f}")
    print(f"recall: {rec:.3f}")
    print(f"f1-score: {f1:.3f}")
    print(f"precision: {prec:.3f}")

#### ***Main.py***

In [None]:
import os
import sys
import warnings
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
import ML_Modules as mm

warnings.simplefilter("ignore")
warnings.filterwarnings("ignore")


def main():
    filename = input().strip()
    file_path = os.path.join(sys.path[0], filename)

    try:
        df = pd.read_csv(file_path)
    except FileNotFoundError:
        print(f"Error: File '{filename}' not found.")
        sys.exit(1)

    required_cols = [
        "Glucose",
        "BloodPressure",
        "SkinThickness",
        "Insulin",
        "BMI",
        "DiabetesPedigreeFunction",
        "Age",
        "FamilyHistory",
        "HbA1c",
        "Outcome",
    ]
    if not all(col in df.columns for col in required_cols):
        print("Error: Required columns missing.")
        sys.exit(1)

    # Select modeling features and target
    features = ["Glucose", "BMI", "Age", "FamilyHistory", "HbA1c"]
    X = df[features]
    y = df["Outcome"]

    # Remove invalid zero values in Glucose or BMI
    mask_valid = (X["Glucose"] != 0) & (X["BMI"] != 0)
    X = X[mask_valid]
    y = y[mask_valid]

    # Outlier treatment (IQR winsorization)
    X_treated = mm.treat_outliers(X)

    # Scale features
    X_scaled = mm.data_scale(X_treated)

    # Train-test split (80-20)
    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42
    )

    # Hyperparameter tuning with GridSearchCV
    param_grid = {
        "C": [0.1, 1, 10, 100],
        "gamma": ["scale", "auto", 0.01, 0.1, 1],
        "kernel": ["rbf"],
    }
    base_svc = SVC()
    grid = GridSearchCV(
        estimator=base_svc,
        param_grid=param_grid,
        cv=5,
        scoring="accuracy",
        n_jobs=-1,
    )
    grid.fit(X_train, y_train)

    best_model = grid.best_estimator_

    # Predict on test data
    y_pred = best_model.predict(X_test)

    # Final evaluation output
    mm.evaluate_classifier(y_test, y_pred)

if __name__ == "__main__":
    main()
