In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from performance_measures import performance_measures
import timeit
from imblearn.over_sampling import RandomOverSampler

In [None]:
from sklearn.linear_model import LogisticRegression # Logistic Regression
from sklearn.svm import SVC # Support Vector Machines
from sklearn.neighbors import KNeighborsClassifier # K-Nearest Neighnors
from sklearn.naive_bayes import GaussianNB # Naive Bayes 
from sklearn.tree import DecisionTreeClassifier # Decision Tree
from sklearn.ensemble import RandomForestClassifier # Random Forest  
from sklearn.ensemble import AdaBoostClassifier # Ada Boost
from sklearn.ensemble import GradientBoostingClassifier # Gradient Boost
from xgboost import XGBClassifier # 

In [None]:
FetalHealth = pd.read_csv(r"source\fetal_health.csv")

In [None]:
FetalHealth.head()

In [None]:
FetalHealth.describe().T

In [None]:
FetalHealth.info()

Classes : 
1. Normal
2. Suspect
3. Pathological

Analysis consists of :

1. Count plot
2. Correlation heat map

In [None]:
colours = ["pink","orange", "red"]
sns.countplot(data = FetalHealth, x = "fetal_health", palette = colours)

In [None]:
corrmat= FetalHealth.corr()
plt.figure(figsize=(20,20))  

cmap = sns.diverging_palette(250, 10, s=80, l=55, n=9, as_cmap=True)

sns.heatmap(corrmat,annot=True, cmap=cmap, center=0)

In [None]:
cols = FetalHealth.columns
for i in cols:
    sns.boxenplot(x=FetalHealth["fetal_health"], y=FetalHealth[i], palette=colours)
    plt.show()

In [None]:
shades =["#f7b2b0","#c98ea6","#8f7198","#50587f", "#003f5c"]
plt.figure(figsize=(20,10))
sns.boxenplot(data = FetalHealth,palette = shades)
plt.xticks(rotation=90)
plt.show()

Data Preprocessing :

1. Scaling
2. X,y and test, train division

In [None]:
X = FetalHealth.drop(['fetal_health'], axis = 1)
y = FetalHealth['fetal_health']

In [None]:
col_names = list(X.columns)
# Three different scaling methods

#s_scaler = preprocessing.StandardScaler()
#s_scaler = preprocessing.MinMaxScaler()
#s_scaler = preprocessing.RobustScaler()
s_scaler = preprocessing.MaxAbsScaler()
#s_scaler = preprocessing.Normalizer()
#s_scaler = preprocessing.QuantileTransformer()

scaler_name = "Max Abs Scaler"
noscale = False
if(noscale):
    X_df = X
else:
    X_df= s_scaler.fit_transform(X)

X_df = pd.DataFrame(X_df, columns=col_names)   
X_df.describe().T

In [None]:
#looking at the scaled features
plt.figure(figsize=(20,10))
sns.boxenplot(data = X_df,palette = shades)
plt.xticks(rotation=90)
plt.show()

In [None]:
X_train, X_test, y_train,y_test = train_test_split(X_df,y,test_size=0.3,random_state=42)

In [None]:
ros = RandomOverSampler(random_state=42)
X_train,y_train = ros.fit_resample(X_train,y_train)

In [None]:
X_train

In [None]:
y_train

In [None]:
performanceMeasuresDF = pd.DataFrame(columns=['Classifier Model', 'Accuracy','F1-Score','Recall','Precision','Jaccard-Score','Kappa-Score'])

In [None]:
lr = LogisticRegression(max_iter = 100000)
result = %timeit -o lr.fit(X=X_train, y=y_train) 
lr.fit(X = X_train, y = y_train)
print("Logistic Regression : ")
measures = performance_measures("Logistic Regression", lr, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
#performanceMeasuresDF = pd.concat((performanceMeasuresDF, pd.DataFrame(measures)))
exec_time = result.average

In [None]:
svmlinear = SVC(kernel = 'linear')
result = %timeit -o svmlinear.fit(X=X_train, y = y_train)
svmlinear.fit(X=X_train, y = y_train)
print("SVM Linear : ")
measures = performance_measures("Support Vector Machine(Linear)", svmlinear, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
svmrbf = SVC(kernel = 'rbf')
result = %timeit -o svmrbf.fit(X=X_train, y = y_train)
svmrbf.fit(X=X_train, y = y_train)
print("SVM RBF : ")
measures = performance_measures("Support Vector Machine(RBF)", svmrbf, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

Two variants which haven't been tried before

In [None]:
svmpoly = SVC(kernel = 'poly')
result = %timeit -o svmpoly.fit(X=X_train, y = y_train)
svmpoly.fit(X=X_train, y = y_train)
print("SVM Poly : ")
measures = performance_measures("Support Vector Machine(Poly)",svmpoly, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
svmsigmoid = SVC(kernel = 'sigmoid')
result = %timeit -o svmsigmoid.fit(X=X_train, y = y_train)
svmsigmoid.fit(X=X_train, y = y_train)
print("SVM Sigmoid : ")
measures = performance_measures("Support Vector Machine(Sigmoid)",svmsigmoid, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
knn = KNeighborsClassifier(n_neighbors=5)
result = %timeit -o knn.fit(X=X_train, y = y_train) 
knn.fit(X=X_train, y = y_train)
print("K-Nearest Neighbors : ")
measures = performance_measures("K-Nearest Neighbors",knn, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
gnb = GaussianNB()
result = %timeit -o gnb.fit(X_train, y_train)
gnb.fit(X_train, y_train)
print("Naive Bayes : ")
measures = performance_measures("Naive Bayes",gnb, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
dt = DecisionTreeClassifier()
result = %timeit -o dt.fit(X_train, y_train)
dt.fit(X_train, y_train)
print("Decision Trees : ")
measures = performance_measures("Decision Tree",dt, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
rfc = RandomForestClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o rfc.fit(X_train, y_train)
rfc.fit(X_train, y_train)
print("Random Forest Classifier : ")
measures = performance_measures("Random Forest",rfc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
abc = AdaBoostClassifier(n_estimators = 100, random_state = 42)
result = %timeit -o abc.fit(X_train, y_train)
abc.fit(X_train, y_train)
print("Ada Boost Classifier : ")
measures = performance_measures("Ada Boost",abc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
gbc = GradientBoostingClassifier(n_estimators = 100, random_state = 42, learning_rate = 1.0, max_depth = 1)
result = %timeit -o gbc.fit(X_train, y_train)
gbc.fit(X_train, y_train)
print("Gradient Boost Classifier : ")
measures = performance_measures("Gradient Boost",gbc, X_test, y_test, performanceMeasuresDF)
performanceMeasuresDF.loc[len(performanceMeasuresDF)] = measures
exec_time += result.average

In [None]:
performanceMeasuresDF

In [None]:
performanceMeasuresDF["Weighted_Score"] = performanceMeasuresDF["Accuracy"] + performanceMeasuresDF["F1-Score"] + performanceMeasuresDF["Recall"] + performanceMeasuresDF["Precision"] + performanceMeasuresDF["Jaccard-Score"] + performanceMeasuresDF["Kappa-Score"]

In [None]:
performanceTable = performanceMeasuresDF.sort_values(by = "Weighted_Score",ascending= False)

In [None]:
performanceTable.to_excel("results\\"+ scaler_name + '.xlsx', index = False)

In [None]:
print(exec_time)

with open(r"results/" + scaler_name + r" Timings.txt", "+a") as file:
    file.write(str(exec_time) + "\n")

In [None]:
import winsound
freq = 1000
dur = 2000
winsound.Beep(freq,dur)
winsound.Beep(freq + 100,dur)

In [None]:
#%reset