**IMPORTING BASIC LIBRARIES**

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

**READING THE DATASET**

In [2]:
df=pd.read_csv("data/CTG.csv")


**CLEANING THE DATA AND PREPARING INPUT AND OUTPUT FIELDS**

In [3]:
df=df.drop(["FileName","Date","SegFile","b","e"],axis=1)
df=df.dropna()
df.isnull().sum()
X=df[['LBE', 'LB', 'AC', 'FM', 'UC', 'DL',
       'DS', 'DP', 'DR']]
Y=df[["NSP"]]

**PERFORMING THE SCALING**

In [4]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler
Scaler=StandardScaler()
X=Scaler.fit_transform(X)

**SPLITTING THE DATASET INTO TRAINING SET AND TEST SET**

In [5]:

from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.3,random_state=42)

**SUPPORT VECTOR CLASSIFIER MODEL**

In [6]:
from sklearn.svm import SVC

svm_clf=SVC(kernel="poly",degree=6,coef0=5,gamma=0.1)
svm_clf=svm_clf.fit(X_train,y_train)
y_pred=svm_clf.predict(X_test)

**CALCULATING DIFFERENT METRICS**

In [7]:
from sklearn.metrics import accuracy_score, precision_score, f1_score, recall_score, confusion_matrix
print("CONFUSION MATRIX")
print(confusion_matrix(y_test,y_pred))
print("F1-score:",f1_score(y_test,y_pred,average='weighted'))
print("Accuracy-score:",accuracy_score(y_test,y_pred))
print("Precision-score:",precision_score(y_test,y_pred,average='weighted'))
print("Recall-score:",recall_score(y_test,y_pred,average="weighted"))

CONFUSION MATRIX
[[470  20   6]
 [ 29  62  10]
 [  8   5  28]]
F1-score: 0.8753514883881671
Accuracy-score: 0.877742946708464
Precision-score: 0.8744054575648664
Recall-score: 0.877742946708464


**SAVING THE SVM MODEL FOR FUTURE PURPOSES**

In [18]:
import joblib
joblib.dump(svm_clf, './models/SVC-model')
#model=joblib.load('./models/svc-model') for loading a model

['./models/SVC-model']

**DECISION TREE CLASSIFIER**

In [9]:
from sklearn.tree import DecisionTreeClassifier
tree_clf=DecisionTreeClassifier(min_samples_split=6, min_samples_leaf=4, max_depth=6, )
tree_clf=tree_clf.fit(X_train,y_train)
y_pred=tree_clf.predict(X_test)


**PRINTING THE METRICS OF DTC**

In [10]:
print("Accuracy-score:",accuracy_score(y_test,y_pred))
print("Recall-score:",recall_score(y_test,y_pred,average="weighted"))
print("Precision-score:",precision_score(y_test,y_pred,average='weighted'))

Accuracy-score: 0.8683385579937304
Recall-score: 0.8683385579937304
Precision-score: 0.8603398399877494


In [49]:
# from sklearn.tree import export_graphviz
# g=export_graphviz(
# tree_clf, out_file="tree.dot",
# feature_names=['LBE', 'LB', 'AC', 'FM', 'UC', 'DL',
#        'DS', 'DP', 'DR'],
# class_names=["NSP"],
# rounded=True,
# filled=True)
# graph = graphviz.Source(g)
# graph.render("decision_tree", format="png")
# from subprocess import check_call
# check_call(['dot','-Tpng','tree.dot','-o','tree.png'])
print(Y.shape)



(2126, 1)


**ENSEMBLING TECHNIQUE-VOTING CLASSIFIER**

In [14]:
from sklearn.ensemble import VotingClassifier, RandomForestClassifier
svm_clf=SVC(kernel="poly",degree=6,coef0=5,gamma=0.1,probability=True)
decision_tree=DecisionTreeClassifier(min_samples_split=6, min_samples_leaf=4, max_depth=6)
rnd_clf=RandomForestClassifier()
voting_clf=VotingClassifier(estimators=[("svm",svm_clf),('rf',rnd_clf),("decision_tree",decision_tree)],voting="hard")
voting_clf.fit(X_train,y_train)
joblib.dump(voting_clf, './models/VC-model')

['./models/VC-model']

**PRINTING THE METRICS OF VOTING CLASSIFIER**

In [12]:
for clf in (rnd_clf, svm_clf,decision_tree, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

RandomForestClassifier 0.8699059561128527
SVC 0.877742946708464
DecisionTreeClassifier 0.8683385579937304
VotingClassifier 0.8840125391849529


**ENSEMBLING TECHNIQUE-BAGGING CLASSIFIER**

In [15]:
from sklearn.ensemble import BaggingClassifier
#bagging classifier-has multiple base classifiers of same model which trains  on different random subsets
#voting classifier-has multiple base classifiers of different model that trains on same data.Each model produces diff output and final output is based on which output has max votes(hard voting)

**TRAINING AND PREDICTION**

In [17]:
bag_clf=BaggingClassifier(DecisionTreeClassifier(),n_estimators=500,n_jobs=-1,max_samples=100, bootstrap=True)
bag_clf.fit(X_train,y_train)
y_pred=bag_clf.predict(X_test)
joblib.dump(bag_clf, './models/BC-model')
print(accuracy_score(y_test,y_pred))

0.8714733542319749


**BOOSTING TECHNIQUES-ADA BOOST**

In [19]:
from sklearn.ensemble import AdaBoostClassifier
ada_clf=AdaBoostClassifier(DecisionTreeClassifier(max_depth=1),n_estimators=1000,learning_rate=0.1)

In [20]:
ada_clf.fit(X_train,y_train)
y_pred=ada_clf.predict(X_test)
joblib.dump(ada_clf, './models/ADA-model')
print(accuracy_score(y_test,y_pred))

0.8620689655172413


**BOOSTING TECHNIQUES-XG BOOST**

In [49]:
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
xgb_clf=XGBClassifier(n_estimators=100,learning_rate=200)
xgb_clf.fit(X_train,y_train_encoded)
y_pred=xgb_clf.predict(X_test)
joblib.dump(xgb_clf, './models/XGB-model')
print(accuracy_score(y_test,y_pred))

0.7115987460815048
