In [1]:
import wandb
import os
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import *
from sklearn.metrics import *
from sklearn.preprocessing import (
    StandardScaler,
    RobustScaler,
    MinMaxScaler,
    MaxAbsScaler,
    OneHotEncoder,
    LabelEncoder,
    Normalizer,
)
from sklearn.model_selection import cross_val_score,train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression,LogisticRegressionCV,RidgeClassifier,RidgeClassifierCV
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import GradientBoostingClassifier,AdaBoostClassifier,VotingClassifier,BaggingClassifier,RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier,XGBRFClassifier
from catboost import CatBoostClassifier,CatBoost
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import ExtraTreesClassifier
np.random.seed(42)

In [2]:
data = pd.read_csv('./data.csv')

In [3]:
X = data.drop('fetal_health',axis=1)
y = data['fetal_health']

In [4]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)

In [5]:
PROJECT_NAME = 'Fetal-Health-Clf'

In [6]:
def valid(model,X,y,val=False):
    preds = model.predict(X)
    if val is True:
        results = {
        'Val Accuracy':accuracy_score(y,preds),
        'Val Precision':precision_score(y,preds,average='macro'),
        'Val F1 Score':f1_score(y,preds,average='macro'),
        'Val Recall':recall_score(y,preds,average='macro')
        }
    else:
        results = {
            'Accuracy':accuracy_score(y,preds),
            'Macro Precision':precision_score(y,preds,average='macro'),
            'F1 Score':f1_score(y,preds,average='macro'),
            'Recall':recall_score(y,preds,average='macro')
        }
    return results
def fit(model,X_train,X_test,y_train,y_test,name):
    wandb.init(project=PROJECT_NAME,name=name)
    try:
        model = model()
    except:
        model = model
    model.fit(X_train,y_train)
    wandb.log(valid(model,X_train,y_train))
    wandb.log(valid(model,X_test,y_test,True))
    wandb.sklearn.plot_calibration_curve(model, X, y, name)
    wandb.finish()
    return model

In [7]:
# fit(KNeighborsClassifier,X_train,X_test,y_train,y_test,'baseline')

In [8]:
def fe(data,col):
    max_num = data[col].quantile(0.95)
    min_num  = data[col].quantile(0.05)
    data = data[data[col] < max_num]
    data = data[data[col] > min_num]
    return data

In [9]:
for col in list(['accelerations']):
    data = fe(data,col)
X = data.drop('fetal_health',axis=1)
y = data['fetal_health']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)

In [10]:
# fit(KNeighborsClassifier,X_train,X_test,y_train,y_test,'baseline')

In [11]:
# pres = [StandardScaler(),RobustScaler(),MinMaxScaler(),MaxAbsScaler(),OneHotEncoder(),Normalizer()]
# for pre in pres:
#     X = data.drop('fetal_health',axis=1)
#     y = data['fetal_health']
#     X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)
#     pre = pre
#     X_train = pre.fit_transform(X_train)
#     X_test = pre.transform(X_test)
#     fit(KNeighborsClassifier,X_train,X_test,y_train,y_test,f'{pre}-pre')

In [12]:
pre = MinMaxScaler()
X = data.drop('fetal_health',axis=1)
y = data['fetal_health']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)


X_train = pre.fit_transform(X_train)
X_test = pre.transform(X_test)

In [13]:
# fit(KNeighborsClassifier,X_train,X_test,y_train,y_test,'baseline')

In [14]:
models = [
    ['KNeighborsClassifier',KNeighborsClassifier],
    ['LogisticRegression',LogisticRegression],
    ['LogisticRegressionCV',LogisticRegressionCV],
    ['RidgeClassifier',RidgeClassifier],
    ['RidgeClassifierCV',RidgeClassifierCV],
    ['GaussianNB',GaussianNB],
    ['DecisionTreeClassifier',DecisionTreeClassifier],
    ['GradientBoostingClassifier',GradientBoostingClassifier],
    ['AdaBoostClassifier',AdaBoostClassifier],
    ['RandomForestClassifier',RandomForestClassifier],
    ['BaggingClassifier',BaggingClassifier],
    ['SVC',SVC],
    ['XGBClassifier',XGBClassifier],
    ['XGBRFClassifier',XGBRFClassifier],
    ['CatBoostClassifier',CatBoostClassifier],
]

In [15]:
# fit(BaggingClassifier,X_train,X_test,y_train,y_test,'baseline')

In [16]:
# parm_grid = {
#     'n_estimators':[12,25,10,50,100,125,250,500,1000],
#     'oob_score':[False,True],
#     'warm_start':[False,True]
# }
# model = GridSearchCV(BaggingClassifier(),verbose=1,cv=2,param_grid=parm_grid)
# model = fit(model,X_train,X_test,y_train,y_test,'GridSearch')

In [17]:
# model.best_params_

In [18]:
model = BaggingClassifier()
model = fit(model,X_train,X_test,y_train,y_test,'Final')
import pickle
pickle.dump(model,open('./model.pkl','wb'))

[34m[1mwandb[0m: Currently logged in as: [33mranuga-d[0m (use `wandb login --relogin` to force relogin)
[34m[1mwandb[0m: wandb version 0.11.2 is available!  To upgrade, please run:
[34m[1mwandb[0m:  $ pip install wandb --upgrade


VBox(children=(Label(value=' 0.03MB of 0.03MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Accuracy,0.99518
Macro Precision,0.99828
F1 Score,0.98009
Recall,0.96396
_runtime,10.0
_timestamp,1628484264.0
_step,2.0
Val Accuracy,0.96751
Val Precision,0.82463
Val F1 Score,0.71719


0,1
Accuracy,▁
Macro Precision,▁
F1 Score,▁
Recall,▁
_runtime,▁▁█
_timestamp,▁▁█
_step,▁▅█
Val Accuracy,▁
Val Precision,▁
Val F1 Score,▁
