In [33]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, jaccard_score, cohen_kappa_score
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from performance_measures import performance_measures

In [34]:
from sklearn.linear_model import LogisticRegression

In [35]:
FetalHealth = pd.read_csv(r"source\fetal_health.csv")

In [36]:
X = FetalHealth.drop(['fetal_health'], axis = 1)
y = FetalHealth['fetal_health']

In [37]:
col_names = list(X.columns)
# Three different scaling methods

#s_scaler = preprocessing.StandardScaler()
s_scaler = preprocessing.MinMaxScaler()
#s_scaler = preprocessing.RobustScaler()

X_df= s_scaler.fit_transform(X)
X_df = pd.DataFrame(X_df, columns=col_names)   
X_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
baseline value,2126.0,0.505627,0.182238,0.0,0.37037,0.5,0.62963,1.0
accelerations,2126.0,0.167277,0.203452,0.0,0.0,0.105263,0.315789,1.0
fetal_movement,2126.0,0.01971,0.097018,0.0,0.0,0.0,0.006237,1.0
uterine_contractions,2126.0,0.291094,0.196405,0.0,0.133333,0.266667,0.466667,1.0
light_decelerations,2126.0,0.125964,0.197347,0.0,0.0,0.0,0.2,1.0
severe_decelerations,2126.0,0.003293,0.0573,0.0,0.0,0.0,0.0,1.0
prolongued_decelerations,2126.0,0.031703,0.11799,0.0,0.0,0.0,0.0,1.0
abnormal_short_term_variability,2126.0,0.466535,0.229238,0.0,0.266667,0.493333,0.653333,1.0
mean_value_of_short_term_variability,2126.0,0.166586,0.129888,0.0,0.073529,0.147059,0.220588,1.0
percentage_of_time_with_abnormal_long_term_variability,2126.0,0.108205,0.202164,0.0,0.0,0.0,0.120879,1.0


In [38]:
X_train, X_test, y_train,y_test = train_test_split(X_df,y,test_size=0.3,random_state=42)

In [39]:
lr = LogisticRegression()
lr.fit(X=X_train, y=y_train)
y_pred = lr.predict(X = X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average = "weighted")
recall = recall_score(y_test, y_pred, average = "weighted")
precision = precision_score(y_test, y_pred, average = 'weighted')
jaccard = jaccard_score(y_test, y_pred, average = 'weighted')
kappa = cohen_kappa_score(y_test, y_pred)
print("Accuracy :", accuracy)
print("F1 :", f1)
print("Recall :", recall)
print("Precision :", precision)
print("Jaccard :", jaccard)
print("Kappa :", kappa)

Accuracy : 0.8949843260188087
F1 : 0.8917618842284725
Recall : 0.8949843260188087
Precision : 0.8904416797781926
Jaccard : 0.8171917295295531
Kappa : 0.699629684283014


In [40]:
lr = LogisticRegression()
lr.fit(X = X_train, y = y_train)
performance_measures(lr, X_test, y_test)

Accuracy : 0.8949843260188087
F1 : 0.8917618842284725
Recall : 0.8949843260188087
Precision : 0.8904416797781926
Jaccard : 0.8171917295295531
Kappa : 0.699629684283014


def performance_measures(model, X_test, y_test):

    y_pred = model.predict(X = X_test)
    accuracy = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average = "weighted")
    recall = recall_score(y_test, y_pred, average = "weighted")
    precision = precision_score(y_test, y_pred, average = 'weighted')
    jaccard = jaccard_score(y_test, y_pred, average = 'weighted')
    kappa = cohen_kappa_score(y_test, y_pred)
    print("Accuracy :", accuracy)
    print("F1 :", f1)
    print("Recall :", recall)
    print("Precision :", precision)
    print("Jaccard :", jaccard)
    print("Kappa :", kappa)