In [1]:
from sklearn.preprocessing import add_dummy_feature
from numpy import log,dot,e,shape
import numpy as np

In [2]:
class Logistic:
    def __init__(self):
        self.coeff=None
        self.history=[]
     
    def sigmoid(self,X):
        z=np.dot(X,self.coeff)
        sig=1/(1+e**(-z))
        return sig
    
    def cost(self,y,y_pred):
        m=y.shape[0]
        temp=np.dot((1-y).T,log(1-y_pred))+ np.dot(y.T,log(y_pred))
        return -1*temp/m
    
    def train(self,X,Y,lr,epochs):
        add_dummy_feature(X,value=1.0)
        m=X.shape[0]
        self.coeff=np.zeros((X.shape[1]))
        for i in range(epochs):
            y_pred=self.sigmoid(X)
            error=y_pred-Y
            c=self.cost(Y,y_pred)
            self.history.append(c)
            self.coeff=self.coeff-(lr/m)*np.dot(X.T,error)
#         print(self.coeff)
            
    def round_off(self,pred):
        m=pred.shape[0]
        for i in range(m):
            k=pred[i]
            if (k>0.5):
                pred[i]=1
            else :
                pred[i]=0
                
        return pred
            

In [3]:
import pandas as pd
pcos=pd.read_csv('PCOS.csv')
print(pcos.head())

   PCOS (Y/N)  Age (yrs)  Weight (Kg)  Height(Cm)    BMI  Blood Group  \
0           0         28         44.6       152.0  19.30           15   
1           0         36         65.0       161.5  24.92           15   
2           1         33         68.8       165.0  25.27           11   
3           0         37         65.0       148.0  29.67           13   
4           0         25         52.0       161.0  20.06           11   

   Pulse rate(bpm)  RR (breaths/min)  Hb(g/dl)  Cycle(R/I)  ...  Pimples(Y/N)  \
0               78                22     10.48           0  ...             0   
1               74                20     11.70           0  ...             0   
2               72                18     11.80           0  ...             1   
3               72                20     12.00           0  ...             0   
4               72                18     10.00           0  ...             0   

   Fast food (Y/N)  Reg.Exercise(Y/N)  BP _Systolic (mmHg)  \
0           

In [4]:
target=pcos.iloc[:,0]
print(target)
data=pcos.iloc[:,1:-1]
print(data)

0      0
1      0
2      1
3      0
4      0
      ..
533    0
534    0
535    0
536    0
537    1
Name: PCOS (Y/N), Length: 538, dtype: int64
     Age (yrs)  Weight (Kg)  Height(Cm)    BMI  Blood Group  Pulse rate(bpm)  \
0           28         44.6     152.000  19.30           15               78   
1           36         65.0     161.500  24.92           15               74   
2           33         68.8     165.000  25.27           11               72   
3           37         65.0     148.000  29.67           13               72   
4           25         52.0     161.000  20.06           11               72   
..         ...          ...         ...    ...          ...              ...   
533         35         50.0     164.592  18.46           17               72   
534         30         63.2     158.000  25.32           15               72   
535         36         54.0     152.000  23.37           13               74   
536         27         50.0     150.000  22.22           

In [5]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
norm_data=scaler.fit_transform(data)
print(norm_data)

[[-0.63232048 -1.36259231 -0.74215445 ... -0.82062633  0.83765843
   0.76854523]
 [ 0.84675959  0.4851069   0.831574   ... -0.37057845 -0.00406782
  -0.43637476]
 [ 0.29210457  0.82928617  1.41136869 ...  1.87966092  0.83765843
   1.37100522]
 ...
 [ 0.84675959 -0.5112015  -0.74215445 ... -1.49569814  0.83765843
  -1.94252475]
 [-0.81720548 -0.87349546 -1.0734657  ... -0.14555452  0.83765843
   0.16608523]
 [-1.55674552  2.02485625  1.41136869 ...  0.75454123  1.11823385
   0.76854523]]


# PREDICTING MODEL USING LOGISTIC REGRESSION

In [6]:
from sklearn.model_selection import train_test_split

model=Logistic()
X=norm_data
Y=target
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,random_state=0,test_size=0.3)

model.train(X_train,Y_train,0.2,100)
# print(X_test)
print("-----------------Y-TEST-----------------",Y_test)
# print(shape(Y_test))
# pred=model.sigmoid(X_test)
# print(shape(pred))

pred=model.sigmoid(X_test)
# print(shape(pred))
print('-------------PREDICTED-VALUES---------------')
print(pred)

predict=model.round_off(pred)
print('---------ROUND OFF PREDICTED VALUES----------')
print(predict)
# print(shape(predict))

from sklearn import metrics
print('----------------Testing Accuracy------------------------')
print(metrics.accuracy_score(Y_test,predict))

from sklearn.metrics import confusion_matrix
print('---------------------------Confusion matrix for Testing----------')
c1=confusion_matrix(Y_test,predict)
print(c1)

TN,FP,FN,TP = confusion_matrix(Y_test,predict).ravel()
Performance_measure=(TN+TP)/(TN+FP+FN+TP)
print('--------------PERFORMANCE MEASURE------- : ')
print(Performance_measure)

Error_rate=(FP+FN)/(TN+FP+FN+TP)
print('------------ERROR RATE--------------- : ')
print(Error_rate)


-----------------Y-TEST----------------- 172    1
468    1
196    0
416    1
534    0
      ..
438    0
361    0
301    0
243    1
252    0
Name: PCOS (Y/N), Length: 162, dtype: int64
-------------PREDICTED-VALUES---------------
[0.99886819 0.57628247 0.6063521  0.76067238 0.40849503 0.96785139
 0.07634597 0.51214028 0.98781159 0.10580595 0.71962014 0.10016031
 0.99404987 0.07921203 0.99793106 0.98900308 0.73301742 0.02989127
 0.62977132 0.00930181 0.20498536 0.33087834 0.04809536 0.22147309
 0.1286607  0.07943557 0.99097212 0.05663783 0.14376147 0.05241488
 0.98466472 0.55522971 0.39104508 0.16622512 0.01701471 0.9991141
 0.10564042 0.95268486 0.05094655 0.9794293  0.04817415 0.53993908
 0.99449339 0.38966957 0.27673032 0.85530153 0.62742672 0.98861329
 0.99777048 0.80137435 0.01286412 0.04605646 0.65434176 0.63968713
 0.4546431  0.9392318  0.06743781 0.02193438 0.01382506 0.87789211
 0.20527325 0.08965216 0.01609193 0.34525315 0.99652037 0.0384296
 0.02619823 0.81438801 0.01066458 0.

# PREDICTING MODEL USING KNN


In [7]:
X=norm_data
Y=target

from sklearn.model_selection import train_test_split
X_train, X_test,y_train, y_test = train_test_split(X,Y,random_state=0,test_size=0.3)

from sklearn.neighbors import KNeighborsClassifier
Knn=KNeighborsClassifier(n_neighbors=5)
Knn.fit(X_train,y_train)

from sklearn import metrics
y_pred=Knn.predict(X_test)
print('---------Predicted Values----------------')
print(y_pred)
print('-------------Testing Accuracy---------------')
print(metrics.accuracy_score(y_test,y_pred))

from sklearn.metrics import confusion_matrix
print('---------------------------Confusion matrix for Testing----------')
c=confusion_matrix(y_test,y_pred)
print(c)

TN,FP,FN,TP = confusion_matrix(y_test, y_pred).ravel()
Performance_measure=(TN+TP)/(TN+FP+FN+TP)
print('--------------PERFORMANCE MEASURE------- : ')
print(Performance_measure)

Error_rate=(FP+FN)/(TN+FP+FN+TP)
print('------------ERROR RATE--------------- : ')
print(Error_rate)

---------Predicted Values----------------
[1 0 0 1 0 1 0 0 1 0 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0
 0 0 1 0 0 1 0 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 1 0 1 1
 0 0 0 0 1 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 0 0 1 1 1 0 1 1 0
 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 0 1 0 0 0 1 1 0 0 1 1 0 0 0 0 0 0 0
 0 0 0 0 0 1 1 0 1 0 0 0 1 0]
-------------Testing Accuracy---------------
0.8518518518518519
---------------------------Confusion matrix for Testing----------
[[97  4]
 [20 41]]
--------------PERFORMANCE MEASURE------- : 
0.8518518518518519
------------ERROR RATE--------------- : 
0.14814814814814814


  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)


# PREDICTING MODEL USING  NAIVE BYES

In [8]:
X=norm_data
Y=target

from sklearn.model_selection import train_test_split
X_train, X_test,Y_train, y_test = train_test_split(X,Y,random_state=0,test_size=0.3)

from sklearn.naive_bayes import GaussianNB  
model = GaussianNB()  
model.fit(X_train, Y_train)  
predict=model.predict(X_test)
print('--------------Predicted Values---------------------')
print(predict)

from sklearn import metrics
print('-------------Testing Accuracy---------------')
print(metrics.accuracy_score(y_test,predict))

from sklearn.metrics import confusion_matrix
print('---------------------------Confusion matrix for Testing----------')
c=confusion_matrix(y_test,predict)
print(c)

TN,FP,FN,TP = confusion_matrix(y_test, y_pred).ravel()
Performance_measure=(TN+TP)/(TN+FP+FN+TP)
print('--------------PERFORMANCE MEASURE------- : ')
print(Performance_measure)

Error_rate=(FP+FN)/(TN+FP+FN+TP)
print('------------ERROR RATE--------------- : ')
print(Error_rate)

--------------Predicted Values---------------------
[1 1 1 1 0 1 0 1 1 0 1 0 1 0 1 1 1 1 0 0 0 1 0 0 0 0 1 1 0 0 1 1 1 0 0 1 0
 1 0 1 0 0 1 0 0 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 0 0 0 1 1 0 0 1 0 0 1 1 1 1
 0 1 0 1 0 0 0 1 1 0 0 0 0 0 1 1 0 0 0 1 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 1 1
 1 1 0 0 0 0 1 1 1 0 0 1 0 0 0 1 1 0 1 0 1 0 0 0 1 1 0 0 1 1 0 1 1 1 0 0 0
 0 0 0 0 0 1 1 1 1 1 0 1 1 0]
-------------Testing Accuracy---------------
0.8395061728395061
---------------------------Confusion matrix for Testing----------
[[76 25]
 [ 1 60]]
--------------PERFORMANCE MEASURE------- : 
0.8518518518518519
------------ERROR RATE--------------- : 
0.14814814814814814
