## Utils

In [1]:
import pandas as pd
from catboost import CatBoostClassifier

### functions

In [2]:
def display_info(data, data_name):
    print((f"Size Of {data_name}: {data.shape}\n\nColumns: {data.columns}"))

## Code

In [3]:
original_data = pd.read_csv("train.csv", index_col="id")
original_data.head(1)

Unnamed: 0_level_0,Age,Sex,Chest pain type,BP,Cholesterol,FBS over 120,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
0,58,1,4,152,239,0,0,158,1,3.6,2,2,7,Presence


### Split Data

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
df_copy = original_data.sample(100)
print(f"Shape Of Sample: {df_copy.shape}\n\nColumns: {df_copy.columns}")

Shape Of Sample: (100, 14)

Columns: Index(['Age', 'Sex', 'Chest pain type', 'BP', 'Cholesterol', 'FBS over 120',
       'EKG results', 'Max HR', 'Exercise angina', 'ST depression',
       'Slope of ST', 'Number of vessels fluro', 'Thallium', 'Heart Disease'],
      dtype='object')


In [6]:
FEATURES = df_copy.drop(columns=["Heart Disease"])
display_info(FEATURES, "FEATURES")

Size Of FEATURES: (100, 13)

Columns: Index(['Age', 'Sex', 'Chest pain type', 'BP', 'Cholesterol', 'FBS over 120',
       'EKG results', 'Max HR', 'Exercise angina', 'ST depression',
       'Slope of ST', 'Number of vessels fluro', 'Thallium'],
      dtype='object')


In [7]:
TARGET = df_copy["Heart Disease"]
print(f"Size: {TARGET.shape}")

Size: (100,)


In [8]:
X = FEATURES
y = TARGET

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,     
    random_state=42     
)

print(f"Train Shape Rows x Colums :{X_train.shape, y_train.shape}\n\nTest Shape Rows x Colums:{X_test.shape, y_test.shape}")

Train Shape Rows x Colums :((80, 13), (80,))

Test Shape Rows x Colums:((20, 13), (20,))


### Train

In [9]:
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from sklearn.datasets import load_breast_cancer

In [10]:
models = {
    "RandomForest":        RandomForestClassifier(n_estimators=100, random_state=42),
    "GradientBoosting":    GradientBoostingClassifier(random_state=42),
    "LogisticRegression":  LogisticRegression(max_iter=1000),
    "SVM":                 SVC(probability=True),
    "KNN":                 KNeighborsClassifier(n_neighbors=5),
}

### MLFLOW

In [11]:
import mlflow
import mlflow.sklearn

In [17]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
mlflow.set_experiment("Experiments")

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        
        model.fit(X_train, y_train)
        y_pred_train =  model.predict_proba(X_train)[:, 1]
        y_prob = model.predict_proba(X_test)[:, 1]

        auc = roc_auc_score(y_test, y_prob)
        auc_train = roc_auc_score(y_train, y_pred_train)

        mlflow.log_param("model_type", model_name)
        mlflow.log_params(model.get_params())   
        mlflow.log_metric("roc_auc",  auc)
        mlflow.log_metric("roc_auc_train",  auc_train)
        
        mlflow.sklearn.log_model(model, artifact_path="model")


  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


üèÉ View run RandomForest at: http://127.0.0.1:5000/#/experiments/333699712299571206/runs/4b136e8d06764d3ea35a48bd5c9953ed
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/333699712299571206


  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


üèÉ View run GradientBoosting at: http://127.0.0.1:5000/#/experiments/333699712299571206/runs/50bbd88e3c1b427eaa90da2cdbc54b20
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/333699712299571206


  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


üèÉ View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/333699712299571206/runs/6e461a6b58fa4d08babdb955adc87b8a
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/333699712299571206


  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


üèÉ View run SVM at: http://127.0.0.1:5000/#/experiments/333699712299571206/runs/b95749fd6d734d3cad3df21cadc46ef2
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/333699712299571206


  flavor.save_model(path=local_path, mlflow_model=mlflow_model, **kwargs)


üèÉ View run KNN at: http://127.0.0.1:5000/#/experiments/333699712299571206/runs/84284c18aba84a64a5f15f88b3394e17
üß™ View experiment at: http://127.0.0.1:5000/#/experiments/333699712299571206


In [None]:
runs = mlflow.search_runs(experiment_names=["Kaggle Experiment"])
runs.head(1)