In [22]:
import pandas as pd
import matplotlib.pyplot as plt
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [23]:
df = pd.read_csv('clean_data.csv')
df.shape

(534, 7)

In [24]:
X = df.drop(columns=["PCOS (Y/N)"])
y = df["PCOS (Y/N)"].values

In [25]:
X

Unnamed: 0,Follicle No. (R),Follicle No. (L),Skin darkening (Y/N),hair growth(Y/N),Weight gain(Y/N),Cycle(R/I)
0,3,3,0,0,0,2
1,5,3,0,0,0,2
2,15,13,0,0,0,2
3,2,2,0,0,0,2
4,4,3,0,0,0,2
...,...,...,...,...,...,...
529,0,2,0,0,0,2
530,7,9,0,0,0,2
531,10,8,0,0,0,4
532,7,7,0,0,0,2


In [26]:
X = X.values
X

array([[ 3,  3,  0,  0,  0,  2],
       [ 5,  3,  0,  0,  0,  2],
       [15, 13,  0,  0,  0,  2],
       ...,
       [10,  8,  0,  0,  0,  4],
       [ 7,  7,  0,  0,  0,  2],
       [10,  9,  1,  1,  1,  4]])

In [27]:
seed = 13
test_size = 0.2
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=seed)

In [28]:
X_train

array([[ 4,  6,  0,  0,  0,  2],
       [ 3,  5,  0,  0,  0,  2],
       [14, 16,  1,  0,  1,  2],
       ...,
       [ 3,  2,  0,  0,  0,  4],
       [14, 15,  1,  1,  1,  2],
       [10, 12,  1,  1,  1,  4]])

In [29]:
X_test

array([[12, 15,  1,  1,  0,  2],
       [ 2,  6,  0,  0,  0,  2],
       [ 2,  1,  0,  0,  0,  2],
       [ 6,  7,  0,  0,  1,  2],
       [11, 14,  1,  1,  1,  4],
       [ 3,  3,  0,  0,  0,  2],
       [10,  9,  0,  1,  0,  2],
       [ 9,  5,  0,  0,  1,  2],
       [ 1,  1,  0,  1,  0,  2],
       [10, 11,  1,  0,  1,  4],
       [15, 10,  1,  1,  0,  2],
       [ 6,  5,  0,  0,  0,  2],
       [12,  8,  1,  0,  1,  4],
       [ 1,  2,  0,  0,  0,  2],
       [ 4,  2,  0,  0,  0,  2],
       [ 4,  3,  0,  0,  0,  4],
       [ 2,  5,  0,  0,  0,  4],
       [12,  9,  1,  1,  1,  4],
       [ 8,  5,  1,  0,  0,  2],
       [ 4,  6,  0,  0,  0,  2],
       [ 7,  9,  0,  0,  1,  2],
       [ 2,  4,  1,  0,  1,  4],
       [12, 10,  0,  1,  1,  4],
       [16, 14,  1,  1,  1,  4],
       [ 4,  4,  0,  0,  1,  2],
       [ 3,  3,  0,  0,  0,  2],
       [ 1,  2,  0,  0,  0,  2],
       [13, 12,  1,  0,  0,  2],
       [ 6,  7,  0,  0,  0,  4],
       [ 5,  6,  0,  0,  0,  4],
       [ 1

In [30]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train = scaler.fit_transform(X_train)

X_test = scaler.transform(X_test)

In [31]:
model = XGBClassifier(learning_rate =0.1, n_estimators=1000, max_depth=5,
min_child_weight=1, gamma=0, subsample=0.8,
colsample_bytree=0.8, objective= 'binary:logistic',
nthread=4, scale_pos_weight=1, seed=27)
model.fit(X_train, y_train)

In [32]:
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]

In [33]:
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 90.65%


In [34]:
tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
print("True Negatives: ",tn)
print("False Positives: ",fp)
print("False Negatives: ",fn)
print("True Positives: ",tp)
specificity = tn / (tn+fp)
print(specificity)

True Negatives:  68
False Positives:  6
False Negatives:  4
True Positives:  29
0.918918918918919


In [35]:
sensitivity=tp/(fn+tp)
print(sensitivity)

0.8787878787878788


In [36]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      0.92      0.93        74
           1       0.83      0.88      0.85        33

    accuracy                           0.91       107
   macro avg       0.89      0.90      0.89       107
weighted avg       0.91      0.91      0.91       107



# PROCEDURE TO EVALUATE NEW USER INPUT DATA

In [37]:
# user input data should be in this form:
test_data = df.drop(columns=["PCOS (Y/N)"])
test_data = test_data.iloc[:1]
test_data

Unnamed: 0,Follicle No. (R),Follicle No. (L),Skin darkening (Y/N),hair growth(Y/N),Weight gain(Y/N),Cycle(R/I)
0,3,3,0,0,0,2


In [38]:
test_data = test_data.values
test_data

array([[3, 3, 0, 0, 0, 2]])

In [39]:
test_data = scaler.transform(test_data)

In [40]:
y_pred = model.predict(test_data)
predictions = [round(value) for value in y_pred]
predictions

[0]