In [1]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.linear_model import LogisticRegression

In [2]:
dataset = pd.read_csv("CKD.csv", index_col=None)
dataset

Unnamed: 0,age,bp,sg,al,su,rbc,pc,pcc,ba,bgr,...,pcv,wc,rc,htn,dm,cad,appet,pe,ane,classification
0,2.000000,76.459948,c,3.0,0.0,normal,abnormal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,yes,no,yes
1,3.000000,76.459948,c,2.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,34.000000,12300.000000,4.705597,no,no,no,yes,poor,no,yes
2,4.000000,76.459948,a,1.0,0.0,normal,normal,notpresent,notpresent,99.000000,...,34.000000,8408.191126,4.705597,no,no,no,yes,poor,no,yes
3,5.000000,76.459948,d,1.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,38.868902,8408.191126,4.705597,no,no,no,yes,poor,yes,yes
4,5.000000,50.000000,c,0.0,0.0,normal,normal,notpresent,notpresent,148.112676,...,36.000000,12400.000000,4.705597,no,no,no,yes,poor,no,yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,a,0.0,0.0,normal,normal,notpresent,notpresent,219.000000,...,37.000000,9800.000000,4.400000,no,no,no,yes,poor,no,yes
395,51.492308,70.000000,c,0.0,2.0,normal,normal,notpresent,notpresent,220.000000,...,27.000000,8408.191126,4.705597,yes,yes,no,yes,poor,yes,yes
396,51.492308,70.000000,c,3.0,0.0,normal,normal,notpresent,notpresent,110.000000,...,26.000000,9200.000000,3.400000,yes,yes,no,poor,poor,no,yes
397,51.492308,90.000000,a,0.0,0.0,normal,normal,notpresent,notpresent,207.000000,...,38.868902,8408.191126,4.705597,yes,yes,no,yes,poor,yes,yes


In [3]:
df = dataset

In [4]:
df = pd.get_dummies(df,dtype=int,drop_first=True)
df

Unnamed: 0,age,bp,al,su,bgr,bu,sc,sod,pot,hrmo,...,pc_normal,pcc_present,ba_present,htn_yes,dm_yes,cad_yes,appet_yes,pe_yes,ane_yes,classification_yes
0,2.000000,76.459948,3.0,0.0,148.112676,57.482105,3.077356,137.528754,4.627244,12.518156,...,0,0,0,0,0,0,1,1,0,1
1,3.000000,76.459948,2.0,0.0,148.112676,22.000000,0.700000,137.528754,4.627244,10.700000,...,1,0,0,0,0,0,1,0,0,1
2,4.000000,76.459948,1.0,0.0,99.000000,23.000000,0.600000,138.000000,4.400000,12.000000,...,1,0,0,0,0,0,1,0,0,1
3,5.000000,76.459948,1.0,0.0,148.112676,16.000000,0.700000,138.000000,3.200000,8.100000,...,1,0,0,0,0,0,1,0,1,1
4,5.000000,50.000000,0.0,0.0,148.112676,25.000000,0.600000,137.528754,4.627244,11.800000,...,1,0,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
394,51.492308,70.000000,0.0,0.0,219.000000,36.000000,1.300000,139.000000,3.700000,12.500000,...,1,0,0,0,0,0,1,0,0,1
395,51.492308,70.000000,0.0,2.0,220.000000,68.000000,2.800000,137.528754,4.627244,8.700000,...,1,0,0,1,1,0,1,0,1,1
396,51.492308,70.000000,3.0,0.0,110.000000,115.000000,6.000000,134.000000,2.700000,9.100000,...,1,0,0,1,1,0,0,0,0,1
397,51.492308,90.000000,0.0,0.0,207.000000,80.000000,6.800000,142.000000,5.500000,8.500000,...,1,0,0,1,1,0,1,0,1,1


In [5]:
indep_x = df.drop('classification_yes',axis=1)
dep_y = df['classification_yes']

In [6]:
def lda(x_train,y_train,x_test, n):
    lda = LinearDiscriminantAnalysis(n_components=n)
    x_train_lda = lda.fit_transform(x_train, y_train)
    x_test_lda = lda.transform(x_test)
    return x_train_lda,x_test_lda,lda

In [7]:
def split_scaler(indep_x,dep_y):
    x_train,x_test,y_train,y_test = train_test_split(indep_x,dep_y,test_size=0.2,random_state=0)
    sc = StandardScaler()
    x_train = sc.fit_transform(x_train)
    x_test = sc.transform(x_test)
    return x_train,x_test,y_train,y_test,sc

def cm_prediction(classifier,x_test_lda,y_test):
    y_pred = classifier.predict(x_test_lda)

    cm = confusion_matrix(y_test,y_pred)
    accuracy = accuracy_score(y_test,y_pred)
    report = classification_report(y_test,y_pred)
    return classifier,accuracy

def logistic(x_train_lda,y_train,x_test_lda,y_test):
    classifier = LogisticRegression(random_state=0)
    classifier.fit(x_train_lda,y_train)
    return cm_prediction(classifier,x_test_lda,y_test)

In [8]:
def LDA_classification(acclog):
    LDAdataframe = pd.DataFrame(index = ['LDA'], columns = ['Logistic'])
    LDAdataframe['Logistic'] = acclog
    return LDAdataframe    

acclog = []

In [9]:
x_train,x_test,y_train,y_test,sc = split_scaler(indep_x,dep_y)


In [10]:
x_train_lda, x_test_lda, lda = lda(x_train,y_train,x_test, 1)

acclog=[]

In [11]:
x_train_lda

array([[-1.19295746],
       [-0.9727614 ],
       [ 3.53581083],
       [ 3.63653091],
       [-2.32171708],
       [-1.87286893],
       [ 1.94035944],
       [ 0.18843777],
       [ 0.56767341],
       [-3.95288532],
       [-1.38869563],
       [-1.04749716],
       [ 1.92005085],
       [ 0.71359632],
       [-2.23100308],
       [-1.09516459],
       [-2.2295652 ],
       [ 0.19477264],
       [-0.25168344],
       [-0.3117815 ],
       [-0.45070106],
       [ 0.87208099],
       [ 2.17082786],
       [-2.63009251],
       [ 2.35424767],
       [-2.20507531],
       [ 1.6541583 ],
       [-1.90843509],
       [ 1.86481808],
       [-2.94165004],
       [-1.70819581],
       [ 1.99105078],
       [ 0.14866287],
       [ 2.63572395],
       [-1.99577494],
       [-2.36546185],
       [-0.02193525],
       [ 2.41474247],
       [ 2.15192524],
       [ 2.27072556],
       [-0.75297794],
       [ 1.9921241 ],
       [-3.53633098],
       [-0.98299517],
       [-0.53890428],
       [ 2

In [12]:
x_test_lda

array([[ 3.54420132e+00],
       [-4.55010779e-01],
       [ 2.26584201e+00],
       [-2.76595747e+00],
       [-1.87715751e+00],
       [-1.45200644e+00],
       [-3.63962319e+00],
       [-1.19878905e+00],
       [ 1.64608368e+00],
       [ 2.68143284e+00],
       [ 2.28425273e-01],
       [ 4.48146142e-01],
       [ 2.18209825e+00],
       [-1.26877344e+00],
       [ 2.12944976e+00],
       [ 2.37045521e+00],
       [-8.63262703e-01],
       [ 2.54067950e+00],
       [-2.79482107e+00],
       [-1.93478672e+00],
       [ 2.13989983e+00],
       [-1.47317008e+00],
       [-1.57242186e+00],
       [ 1.85880283e+00],
       [-7.24495508e+00],
       [ 1.93377799e+00],
       [ 3.26805984e+00],
       [ 3.21241482e+00],
       [-1.31908598e+00],
       [ 2.14664267e-01],
       [-3.52282047e-01],
       [-2.65293405e+00],
       [ 1.48890850e+00],
       [ 1.75317604e+00],
       [ 2.42725601e+00],
       [ 3.09569804e+00],
       [ 9.95346296e-01],
       [ 2.10106661e+00],
       [-1.9

In [13]:
lda

In [14]:
classifier,accuracy = logistic(x_train_lda,y_train,x_test_lda,y_test)
acclog.append(accuracy)

In [15]:
result = LDA_classification(acclog)

In [16]:
result

Unnamed: 0,Logistic
LDA,0.975


In [17]:
### Save the model

In [18]:
filename = 'Finalized_Logistic_Model.sav'
pickle.dump(classifier,open(filename,'wb'))

In [19]:
### Save the PrePro

In [20]:
filenameSC = 'sc.pkl'
pickle.dump(sc,open(filenameSC,'wb'))
sc = pickle.load(open('sc.pkl','rb'))

In [21]:
### Save the LDA

In [22]:
filenameLDA = 'lda.pkl'
pickle.dump(lda,open(filenameLDA,'wb'))
lda = pickle.load(open('lda.pkl','rb'))

In [23]:
df.columns

Index(['age', 'bp', 'al', 'su', 'bgr', 'bu', 'sc', 'sod', 'pot', 'hrmo', 'pcv',
       'wc', 'rc', 'sg_b', 'sg_c', 'sg_d', 'sg_e', 'rbc_normal', 'pc_normal',
       'pcc_present', 'ba_present', 'htn_yes', 'dm_yes', 'cad_yes',
       'appet_yes', 'pe_yes', 'ane_yes', 'classification_yes'],
      dtype='object')

In [24]:
User_input = ([[5.0,76.58,3.0,0.0,148.11,57.48,6.25,154.3,5.25,12.56,52.0,7700,0,1,0,1,0,1,0,1,0,1,0,1,0,1,0]])

In [25]:
scaled_input = sc.transform(User_input)
scaled_input                           



array([[-2.89939466e+00, -3.59418284e-03,  1.70813011e+00,
        -3.65008855e-01, -1.53315608e-02, -5.08082167e-02,
         5.52477536e-01,  1.70735356e+00,  3.37552164e-01,
         4.29417851e-02,  1.61244852e+00, -2.40918855e-01,
        -5.61639544e+00,  2.05657682e+00, -5.15628758e-01,
         2.12132034e+00, -1.26188616e-01,  3.73210014e-01,
        -2.05657682e+00,  3.04800305e+00, -2.37258068e-01,
         1.31396184e+00, -7.40637353e-01,  3.28858857e+00,
        -1.97678838e+00,  1.93937981e+00, -4.36232172e-01]])

In [26]:
lda_input = lda.transform(scaled_input)
lda_input

array([[-0.22496028]])

In [27]:
loaded_model = pickle.load(open('Finalized_Logistic_Model.sav','rb'))
Prediction = loaded_model.predict(lda_input)

In [28]:
Prediction

array([1])