# Importing libraries

In [1]:
import numpy as np
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score 

import warnings #just to remove warning
warnings.filterwarnings("ignore")

# Loading dataset

In [2]:
df = pd.read_excel("Training_Data.xlsx")

In [3]:
df.head()

Unnamed: 0,PatientId,EncounterId,DischargeDisposision,Gender,Race,DiabetesMellitus,ChronicKidneyDisease,Anemia,Depression,ChronicObstructivePulmonaryDisease,...,BetaBlockers,Diuretics,TotalMedicine,CardiacTroponin,Hemoglobin,SerumSodium,SerumCreatinine,BNP,NT-proBNP,ReadmissionWithin_90Days
0,4200412,199171333,Home,Male,White,DM,,Anemia,,COPD,...,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,Yes
1,4055894,26704337,Home,Male,White,DM,CKD,Anemia,Depression,COPD,...,1,5,8,0.0,0.0,0.0,1.54,0.0,0.0,No
2,4867407,60388216,Home,Male,White,DM,CKD,Anemia,,COPD,...,1,1,2,0.0,10.2,0.0,0.0,0.0,0.0,No
3,4058064,274642265,Hospice - Home,Female,White,DM,,Anemia,,COPD,...,0,0,0,0.0,0.0,132.0,0.0,0.0,0.0,No
4,4150623,70000001557327,SNF,Female,White,,,Anemia,,COPD,...,0,0,0,0.0,7.26,0.0,0.0,0.0,0.0,No


# Preprocessing 

In [4]:
df.shape

(8481, 57)

In [5]:
df.isnull().sum()

PatientId                                0
EncounterId                              0
DischargeDisposision                     0
Gender                                   0
Race                                    93
DiabetesMellitus                      3857
ChronicKidneyDisease                  3906
Anemia                                3002
Depression                            5108
ChronicObstructivePulmonaryDisease    3954
Age                                      0
ChronicDiseaseCount                      0
LengthOfStay                             0
EmergencyVisit                           0
InpatientVisit                           0
OutpatientVisit                          0
TotalVisits                              0
BMIMin                                   0
BMIMax                                   0
BMIMedian                                0
BMIMean                                  0
BPDiastolicMin                           0
BPDiastolicMax                           0
BPDiastolic

## Age
Age contain 0 values. As age can't be 0 so fill 0 with mean of that column

In [6]:
df['Age'] = df['Age'].replace(0, int(df['Age'].mean())) 

In [7]:
df['Age'].unique()

array([ 58,  80,  63,  73,  85,  83,  57,  66,  59,  77,  76,  71,  82,
        70,  91,  79,  98,  64,  86,  88,  75,  96,  99,  53,  78,  55,
        67,  89,  69,  81,  62,  61,  72,  31,  45,  87,  68,  90,  74,
        47,  60,  43,  94,  65,  25,  54,  84,  44,  52,  30,  49,  48,
        92,  40,  33,  38,  34,  50,  39,  93,  56,  37,  41,  51,  95,
        46,  42,  97, 101,  21,  18,  35,  29, 100, 102,  27,  28,  36,
        32], dtype=int64)

## TotalVisits, InpatientVisit, OutpatientVisit
Total visit should be equal to sum of inpatient and outpatient visit but 7309 out of 8481 values of totalvisit are not equal to inpatient_vist + out_patient_visit so make these value equal to sum of in and outpatient

In [8]:
count = 0
for i in range(0, len(df)):
    if (df['TotalVisits'][i] != df['InpatientVisit'][i] + df['OutpatientVisit'][i]):
        count += 1
print(count)

7309


In [9]:
for i in range(0, len(df)):
    if (df['TotalVisits'][i] != df['InpatientVisit'][i] + df['OutpatientVisit'][i]):
        df['TotalVisits'][i] = df['InpatientVisit'][i] + df['OutpatientVisit'][i]

## BPDiastolicMin, BPDiastolicMax, BPDiastolicMean, BPDiastolicMedian
3748 rows out of 8481 are same for BPDiastolicMin, BPDiastolicMax, BPDiastolicMean, BPDiastolicMedian

## BPSystolicMin, BPSystolicMax, BPSystolicMean, BPSystolicMedian
3748 rows out of 8481 are same for BPSystolicMin, BPSystolicMax, BPSystolicMean, BPSystolicMedian

In [10]:
count = 0
for i in range(0, len(df)):
    if (df['BPDiastolicMin'][i] == df['BPDiastolicMax'][i] == df['BPDiastolicMean'][i] == df['BPDiastolicMedian'][i] ):
        count += 1
print("Value same for Diastolic", count)

count = 0
for i in range(0, len(df)):
    if (df['BPSystolicMin'][i] == df['BPSystolicMax'][i] == df['BPSystolicMean'][i] == df['BPSystolicMedian'][i] ):
        count += 1
print("Value same for Systolic", count)

Value same for Diastolic 3748
Value same for Systolic 3748


In [1]:
# import matplotlib.pyplot as plt

# fig, axs = plt.subplots(2, 2,figsize=(8,8))


# axs[0, 0].scatter(df['PatientId'], df['BPDiastolicMin'],  s = 5)
# axs[0, 0].set_title('Min')

# axs[0, 1].scatter(df['PatientId'], df['BPDiastolicMax'], s = 5)
# axs[0, 1].set_title('Max')

# axs[1, 0].scatter(df['PatientId'], df['BPDiastolicMean'], s = 5)
# axs[1, 0].set_title('Mean')

# axs[1, 1].scatter(df['PatientId'], df['BPDiastolicMedian'], s = 5)
# axs[1, 1].set_title('Median')

# plt.suptitle('Diastolic',fontsize=20)

# fig, axs = plt.subplots(2, 2,figsize=(8,8))

# axs[0, 0].scatter(df['PatientId'], df['BPSystolicMin'],  s = 5)
# axs[0, 0].set_title('Min')

# axs[0, 1].scatter(df['PatientId'], df['BPSystolicMax'], s = 5)
# axs[0, 1].set_title('Max')

# axs[1, 0].scatter(df['PatientId'], df['BPSystolicMean'], s = 5)
# axs[1, 0].set_title('Mean')

# axs[1, 1].scatter(df['PatientId'], df['BPSystolicMedian'], s = 5)
# axs[1, 1].set_title('Median')

# plt.suptitle('Systolic',fontsize=20)

In [12]:
# Fill 0 vlues with mean of that columns

df['BPDiastolicMin'] = df['BPDiastolicMin'].replace(0, int(df['BPDiastolicMin'].mean())) 
df['BPDiastolicMax'] = df['BPDiastolicMax'].replace(0, int(df['BPDiastolicMax'].mean())) 
df['BPDiastolicMean'] = df['BPDiastolicMean'].replace(0, (df['BPDiastolicMean'].mean())) 
df['BPDiastolicMedian'] = df['BPDiastolicMedian'].replace(0, (df['BPDiastolicMedian'].mean())) 

df['BPSystolicMin'] = df['BPSystolicMin'].replace(0, int(df['BPSystolicMin'].mean())) 
df['BPSystolicMax'] = df['BPSystolicMax'].replace(0, int(df['BPSystolicMax'].mean())) 
df['BPSystolicMean'] = df['BPSystolicMean'].replace(0, (df['BPSystolicMean'].mean())) 
df['BPSystolicMedian'] = df['BPSystolicMedian'].replace(0, (df['BPSystolicMedian'].mean()))

## TemperatureMin, TemperatureMax, TemperatureMean, TemperatureMedian
4218 out of 8481 values of TemperatureMin, TemperatureMax, TemperatureMean, TemperatureMedian are same

In [13]:
# 4218 values of tempraturemin , max, mean, and median are same
count = 0
for i in range(0, len(df)):
    if (df['TemperatureMin'][i] == df['TemperatureMax'][i] == df['TemperatureMean'][i] == df['TemperatureMedian'][i] ):
        count += 1
print(count)

4218


In [14]:
# Fill 0 wiht mean

df['TemperatureMin'] = df['TemperatureMin'].replace(0, (df['TemperatureMin'].mean())) 
df['TemperatureMax'] = df['TemperatureMax'].replace(0, (df['TemperatureMax'].mean())) 
df['TemperatureMean'] = df['TemperatureMean'].replace(0, (df['TemperatureMean'].mean())) 
df['TemperatureMedian'] = df['TemperatureMedian'].replace(0, (df['TemperatureMedian'].mean())) 

## HeartRateMin, HeartRateMax, HeartRateMean, HeartRateMedian
6728 out of 8481 values of HeartRateMin, HeartRateMax, HeartRateMean, HeartRateMedian are same

In [15]:
count = 0
for i in range(0, len(df)):
    if (df['HeartRateMin'][i] == df['HeartRateMax'][i] == df['HeartRateMean'][i] == df['HeartRateMedian'][i] ):
        count += 1
print(count)

6728


In [16]:
# Fill 0 wiht mean
df['HeartRateMin'] = df['HeartRateMin'].replace(0, int(df['HeartRateMin'].mean())) 
df['HeartRateMax'] = df['HeartRateMax'].replace(0, int(df['HeartRateMax'].mean())) 
df['HeartRateMean'] = df['HeartRateMean'].replace(0, (df['HeartRateMean'].mean())) 
df['HeartRateMedian'] = df['HeartRateMedian'].replace(0, (df['HeartRateMedian'].mean())) 

## PulseRateMin, PulseRateMax, PulseRateMean, PulseRateMedian
6237 out of 8481 values of PulseRateMin, PulseRateMax, PulseRateMean, PulseRateMedian are same

## RespiratoryRateMin, RespiratoryRateMax, RespiratoryRateMean, RespiratoryRateMedian
3713 out of 8481 values of RespiratoryRateMin, RespiratoryRateMax, RespiratoryRateMean, RespiratoryRateMedian are same

In [17]:
count = 0
for i in range(0, len(df)):
    if (df['PulseRateMin'][i] == df['PulseRateMax'][i] == df['PulseRateMean'][i] == df['PulseRateMedian'][i] ):
        count += 1
print("PulseRate ",count)

count = 0
for i in range(0, len(df)):
    if (df['RespiratoryRateMin'][i] == df['RespiratoryRateMax'][i] == df['RespiratoryRateMean'][i] == df['RespiratoryRateMedian'][i] ):
        count += 1
print("RespiratoryRate ",count)

PulseRate  6237
RespiratoryRate  3713


In [18]:
# Fill 0 wiht mean
df['PulseRateMin'] = df['PulseRateMin'].replace(0, int(df['PulseRateMin'].mean())) 
df['PulseRateMax'] = df['PulseRateMax'].replace(0, int(df['PulseRateMax'].mean())) 
df['PulseRateMean'] = df['PulseRateMean'].replace(0, (df['PulseRateMean'].mean())) 
df['PulseRateMean'] = df['PulseRateMean'].replace(0, (df['PulseRateMean'].mean())) 

df['RespiratoryRateMin'] = df['RespiratoryRateMin'].replace(0, int(df['RespiratoryRateMin'].mean())) 
df['RespiratoryRateMax'] = df['RespiratoryRateMax'].replace(0, int(df['RespiratoryRateMax'].mean())) 
df['RespiratoryRateMean'] = df['RespiratoryRateMean'].replace(0, (df['RespiratoryRateMean'].mean())) 
df['RespiratoryRateMedian'] = df['RespiratoryRateMedian'].replace(0, (df['RespiratoryRateMedian'].mean())) 

In [19]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8481 entries, 0 to 8480
Data columns (total 57 columns):
 #   Column                              Non-Null Count  Dtype  
---  ------                              --------------  -----  
 0   PatientId                           8481 non-null   int64  
 1   EncounterId                         8481 non-null   object 
 2   DischargeDisposision                8481 non-null   object 
 3   Gender                              8481 non-null   object 
 4   Race                                8388 non-null   object 
 5   DiabetesMellitus                    4624 non-null   object 
 6   ChronicKidneyDisease                4575 non-null   object 
 7   Anemia                              5479 non-null   object 
 8   Depression                          3373 non-null   object 
 9   ChronicObstructivePulmonaryDisease  4527 non-null   object 
 10  Age                                 8481 non-null   int64  
 11  ChronicDiseaseCount                 8481 no

In [20]:
df.columns

Index(['PatientId', 'EncounterId', 'DischargeDisposision', 'Gender', 'Race',
       'DiabetesMellitus', 'ChronicKidneyDisease', 'Anemia', 'Depression ',
       'ChronicObstructivePulmonaryDisease', 'Age', 'ChronicDiseaseCount',
       'LengthOfStay', 'EmergencyVisit', 'InpatientVisit', 'OutpatientVisit',
       'TotalVisits', 'BMIMin', 'BMIMax', 'BMIMedian', 'BMIMean',
       'BPDiastolicMin', 'BPDiastolicMax', 'BPDiastolicMedian',
       'BPDiastolicMean', 'BPSystolicMin', 'BPSystolicMax', 'BPSystolicMedian',
       'BPSystolicMean', 'TemperatureMin', 'TemperatureMax',
       'TemperatureMedian', 'TemperatureMean', 'HeartRateMin', 'HeartRateMax',
       'HeartRateMedian', 'HeartRateMean', 'PulseRateMin', 'PulseRateMax',
       'PulseRateMedian', 'PulseRateMean', 'RespiratoryRateMin',
       'RespiratoryRateMax', 'RespiratoryRateMedian', 'RespiratoryRateMean',
       'ACEInhibitors', 'ARBs', 'BetaBlockers', 'Diuretics', 'TotalMedicine',
       'CardiacTroponin', 'Hemoglobin', 'SerumSod

## Race
Create a new class nemed UnKnown na replace Nan and other race wiht UnKnown 

In [21]:
df['Race'].unique()

array(['White', 'Black or African American', 'Other Race', nan, 'Asian',
       'UnKnown', 'American Indian or Alaska Native',
       'Native Hawaiian or Other Pacific Islander'], dtype=object)

In [22]:
df['Race'].fillna("UnKnown", inplace = True)

In [23]:
df['Race'].replace('Other Race', 'UnKnown', inplace = True)

## DiabetesMellitus
As DiabetesMellitus only have two unique values i.e 'DM', nan.
Create new class UnKnown and replace Nan with it

In [24]:
df['DiabetesMellitus'].unique()

array(['DM', nan], dtype=object)

In [25]:
df['DiabetesMellitus'].value_counts()

DM    4624
Name: DiabetesMellitus, dtype: int64

In [26]:
df['DiabetesMellitus'].fillna("UnKnown", inplace = True)

## ChronicKidneyDisease
As ChronicKidneyDisease only have two unique values Nan and 'CKD'. 
Replace Nan with UnKnown

In [27]:
df['ChronicKidneyDisease'].unique()

array([nan, 'CKD'], dtype=object)

In [28]:
df['ChronicKidneyDisease'].value_counts()

CKD    4575
Name: ChronicKidneyDisease, dtype: int64

In [29]:
df['ChronicKidneyDisease'].fillna("UnKnown", inplace = True)

## Anemia
Anemia also have two unique values Nan and Anemia
Replace Nan with UnKnown

In [30]:
df['Anemia'].unique()

array(['Anemia', nan], dtype=object)

In [31]:
df['Anemia'].value_counts()

Anemia    5479
Name: Anemia, dtype: int64

In [32]:
df['Anemia'].fillna("UnKnown", inplace = True)

## Depression
Depression also have only two unique values Nan and Depression 
Replace Nan with Unknown

In [33]:
df['Depression '].unique()

array([nan, 'Depression '], dtype=object)

In [34]:
df['Depression '].value_counts()

Depression     3373
Name: Depression , dtype: int64

In [35]:
df['Depression '].fillna("UnKnown", inplace = True)

## ChronicObstructivePulmonaryDisease
ChronicObstructivePulmonaryDisease also have two unique values Nan and ChronicObstructivePulmonaryDisease 
Replace Nan with Unknown

In [36]:
df['ChronicObstructivePulmonaryDisease'].unique()

array(['COPD', nan], dtype=object)

In [37]:
df['ChronicObstructivePulmonaryDisease'].value_counts()

COPD    4527
Name: ChronicObstructivePulmonaryDisease, dtype: int64

In [38]:
df['ChronicObstructivePulmonaryDisease'].fillna("UnKnown", inplace = True)

In [39]:
# Store ReadmissionWithin_90Days as classes

In [40]:
classes = df[['ReadmissionWithin_90Days']]

In [41]:
df.drop("ReadmissionWithin_90Days", axis = 1, inplace = True)

In [42]:
df.head()

Unnamed: 0,PatientId,EncounterId,DischargeDisposision,Gender,Race,DiabetesMellitus,ChronicKidneyDisease,Anemia,Depression,ChronicObstructivePulmonaryDisease,...,ARBs,BetaBlockers,Diuretics,TotalMedicine,CardiacTroponin,Hemoglobin,SerumSodium,SerumCreatinine,BNP,NT-proBNP
0,4200412,199171333,Home,Male,White,DM,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,4055894,26704337,Home,Male,White,DM,CKD,Anemia,Depression,COPD,...,0,1,5,8,0.0,0.0,0.0,1.54,0.0,0.0
2,4867407,60388216,Home,Male,White,DM,CKD,Anemia,UnKnown,COPD,...,0,1,1,2,0.0,10.2,0.0,0.0,0.0,0.0
3,4058064,274642265,Hospice - Home,Female,White,DM,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,0.0,132.0,0.0,0.0,0.0
4,4150623,70000001557327,SNF,Female,White,UnKnown,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,7.26,0.0,0.0,0.0,0.0


In [43]:
#Store column of dataframe
col_list = df.columns.to_list()
print(col_list)
len(col_list)

['PatientId', 'EncounterId', 'DischargeDisposision', 'Gender', 'Race', 'DiabetesMellitus', 'ChronicKidneyDisease', 'Anemia', 'Depression ', 'ChronicObstructivePulmonaryDisease', 'Age', 'ChronicDiseaseCount', 'LengthOfStay', 'EmergencyVisit', 'InpatientVisit', 'OutpatientVisit', 'TotalVisits', 'BMIMin', 'BMIMax', 'BMIMedian', 'BMIMean', 'BPDiastolicMin', 'BPDiastolicMax', 'BPDiastolicMedian', 'BPDiastolicMean', 'BPSystolicMin', 'BPSystolicMax', 'BPSystolicMedian', 'BPSystolicMean', 'TemperatureMin', 'TemperatureMax', 'TemperatureMedian', 'TemperatureMean', 'HeartRateMin', 'HeartRateMax', 'HeartRateMedian', 'HeartRateMean', 'PulseRateMin', 'PulseRateMax', 'PulseRateMedian', 'PulseRateMean', 'RespiratoryRateMin', 'RespiratoryRateMax', 'RespiratoryRateMedian', 'RespiratoryRateMean', 'ACEInhibitors', 'ARBs', 'BetaBlockers', 'Diuretics', 'TotalMedicine', 'CardiacTroponin', 'Hemoglobin', 'SerumSodium', 'SerumCreatinine', 'BNP', 'NT-proBNP']


56

In [44]:
# Function to encode column having data type "Object"
from sklearn.preprocessing import LabelEncoder
def encoding(val_rec):
    
    for i in val_rec.columns:
        lb = LabelEncoder() 
        if(val_rec[i].dtypes == np.object):
            val_rec[i] = lb.fit_transform(val_rec[i].astype(str))
        
    return val_rec
        


In [45]:
# Function to calculate Fitness(accuracy)
def calculateFitness(x):
    x = pd.DataFrame(df, columns = x)
    x = encoding(x)
    classes_ = encoding(classes)
    
    X_train, X_test, y_train, y_test = train_test_split(x, classes_, random_state=1)
    
    logisticRegr = LogisticRegression()
    logisticRegr.fit(X_train, y_train)
    predictions = logisticRegr.predict(X_test)
#     score = logisticRegr.score(y_test, predictions)
    return (accuracy_score(y_test, predictions)*100)

In [46]:
# Function to generate Population
def generate_population():
    papulation = []
    fitness = []
    size = 30
    length_of_column = 56
    
    while(len(papulation)!=40):
        feature_list = []
        while(len(feature_list)!=size):
            random = np.random.randint(0, length_of_column)
            if col_list[random] not in feature_list:
                feature_list.append(col_list[random])
        if feature_list not in papulation:
            papulation.append(feature_list)
            fit = calculateFitness(feature_list)
            fitness.append(fit)
    return papulation,fitness

In [47]:
# roulette wheel
def selectOne( fit):
    max = sum([c for c in fit])
    selection_probs = [c/max for c in fit]
    return fit.index(fit[np.random.choice(len(fit), p=selection_probs)])

In [48]:
# Function to select rendomly two index
def selection(fitns):
    selection_list = []
    while (len(selection_list)!=2):
        temp_ = selectOne(fitns)
        if temp_ not in selection_list:
            selection_list.append(temp_)
#             print("Selected index ", temp_)
    return selection_list

In [49]:
# Cross over
def cross_over(p_0, p_1,s):
    while(True):
    #     print(len(np.unique(p_0)) , len(np.unique(p_1)))
        line = np.random.randint(0, s)
#         print("Break at ", line)

        c0 = p_0[0:line] + p_1[line:]
        c1 = p_1[0:line] + p_0[line:]
#         print(len(np.unique(c0)) , len(np.unique(c1)))
        if (len(np.unique(c0)) == s and len(np.unique(c1)) == s):
            break

    return c0,c1


In [50]:
# Mutation
def mutation(c0,c1,s):
    total_col = 56
    mutation_rate = np.random.randint(0, 100)
#     mutation_rate = 39
#     print(mutation_rate)
    fitness_thresold = 40
    if mutation_rate <= fitness_thresold:
        rand_index = np.random.randint(0, s)
#         print("index to change ", rand_index)
        while (True):
            c0[rand_index] = col_list[np.random.randint(0, total_col)]
            c1[rand_index] = col_list[np.random.randint(0, total_col)]
            if (len(np.unique(c0)) == s and len(np.unique(c1)) == s):
                break
#             print("No")
    return c0, c1

In [51]:
# Stoping condition (if the last num fitness(Accuracy are equal))
def stoping_condition(ftns,num):
    count = 1
    for i in range(2, num+1):
        if ftns[-1] == ftns[-i]:
            count += 1
    return count
#     print("Last index are same ",count)

In [52]:
def Genetic_Algorithms():
    # ----------- generate population
    size = 30
    epoch = 500
    length_of_last_index_toCheck = 5
    pop , ftnes = generate_population()
    max_acc = max(ftnes)
    print(" Max is ",max_acc)
    # print(ftnes,"\n\n")

    for i in range(0, epoch):

        # ----------- selection
        select = selection(ftnes)
        s0 = select[0]
        s1 = select[1]

        # print(s0, s1)

        # select parent

        p0 = pop[s0]
        p1 = pop[s1]
    #     print(p0,p1, "\n")

        p0_fitness = ftnes[s0]
        p1_fitness = ftnes[s1]
    #     print(p0_fitness,"\n",p1_fitness)

        # ----------- cross over

        c_0, c_1 = cross_over(p0,p1, size)

    #     print(c_0)
    #     print(c_1)
    #     print("\n\n")
        c_0_, c_1_ = mutation(c_0, c_1, size)
    #     print(c_0_)
    #     print(c_1_)

        c0_fitness = calculateFitness(c_0_)
        c1_fitness = calculateFitness(c_1_)
    #     print(c0_fitness, c1_fitness)


        if c0_fitness > p0_fitness and c_0_ not in pop:
            pop[s0] = c_0_
            ftnes[s0] = c0_fitness
    #             print("Yes")
    #         pop.append(c_0_)
    #         ftnes.append(c0_fitness)    

        if c1_fitness > p1_fitness and c_1_ not in pop:
            pop[s1] = c_1_
            ftnes[s1] = c1_fitness
    #             print("Yes")
    #         pop.append(c_1_)
    #         ftnes.append(c1_fitness)
        if max_acc < max(ftnes):
            max_acc = max(ftnes)
            print(" Max is ",max(ftnes))
        temp = stoping_condition(ftnes,length_of_last_index_toCheck)
        if (temp == 5 or max_acc >= 99):
            print("Existing due to same accuracy or reach max accuracy ")
            break 
    return pop[ftnes.index(max(ftnes))], ftnes,pop

In [53]:
temp , f, p= Genetic_Algorithms()
print("\n30 Best selected feature are \n\n",temp)

 Max is  73.64450730787364
 Max is  73.6916548797737
 Max is  73.92739273927393
 Max is  73.97454031117398
 Max is  74.06883545497406
 Max is  74.1159830268741

30 Best selected feature are 

 ['SerumSodium', 'BNP', 'Hemoglobin', 'BMIMean', 'DiabetesMellitus', 'TemperatureMedian', 'Gender', 'BPSystolicMean', 'BPSystolicMax', 'ARBs', 'BMIMin', 'TotalVisits', 'InpatientVisit', 'EmergencyVisit', 'PulseRateMin', 'BMIMax', 'BPDiastolicMedian', 'Anemia', 'TotalMedicine', 'RespiratoryRateMedian', 'LengthOfStay', 'HeartRateMin', 'ChronicDiseaseCount', 'BPSystolicMedian', 'TemperatureMin', 'RespiratoryRateMean', 'TemperatureMax', 'Diuretics', 'OutpatientVisit', 'NT-proBNP']


## Results without any Feature Selection

In [54]:
df.head()

Unnamed: 0,PatientId,EncounterId,DischargeDisposision,Gender,Race,DiabetesMellitus,ChronicKidneyDisease,Anemia,Depression,ChronicObstructivePulmonaryDisease,...,ARBs,BetaBlockers,Diuretics,TotalMedicine,CardiacTroponin,Hemoglobin,SerumSodium,SerumCreatinine,BNP,NT-proBNP
0,4200412,199171333,Home,Male,White,DM,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0
1,4055894,26704337,Home,Male,White,DM,CKD,Anemia,Depression,COPD,...,0,1,5,8,0.0,0.0,0.0,1.54,0.0,0.0
2,4867407,60388216,Home,Male,White,DM,CKD,Anemia,UnKnown,COPD,...,0,1,1,2,0.0,10.2,0.0,0.0,0.0,0.0
3,4058064,274642265,Hospice - Home,Female,White,DM,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,0.0,132.0,0.0,0.0,0.0
4,4150623,70000001557327,SNF,Female,White,UnKnown,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,7.26,0.0,0.0,0.0,0.0


In [55]:
x = df
x

Unnamed: 0,PatientId,EncounterId,DischargeDisposision,Gender,Race,DiabetesMellitus,ChronicKidneyDisease,Anemia,Depression,ChronicObstructivePulmonaryDisease,...,ARBs,BetaBlockers,Diuretics,TotalMedicine,CardiacTroponin,Hemoglobin,SerumSodium,SerumCreatinine,BNP,NT-proBNP
0,4200412,199171333,Home,Male,White,DM,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,0.00,0.0,0.000000,0.0,0.0
1,4055894,26704337,Home,Male,White,DM,CKD,Anemia,Depression,COPD,...,0,1,5,8,0.0,0.00,0.0,1.540000,0.0,0.0
2,4867407,60388216,Home,Male,White,DM,CKD,Anemia,UnKnown,COPD,...,0,1,1,2,0.0,10.20,0.0,0.000000,0.0,0.0
3,4058064,274642265,Hospice - Home,Female,White,DM,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,0.00,132.0,0.000000,0.0,0.0
4,4150623,70000001557327,SNF,Female,White,UnKnown,UnKnown,Anemia,UnKnown,COPD,...,0,0,0,0,0.0,7.26,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8476,4152524,40004415567,Home Health,Female,White,DM,CKD,Anemia,UnKnown,COPD,...,1,2,3,6,0.0,0.00,0.0,1.076667,0.0,0.0
8477,4042227,14347947026,SNF,Male,White,DM,CKD,UnKnown,Depression,COPD,...,0,0,0,0,0.0,0.00,0.0,0.000000,0.0,0.0
8478,4603405,67117733,Hospice,Female,White,UnKnown,UnKnown,UnKnown,UnKnown,UnKnown,...,0,1,0,1,0.0,0.00,0.0,0.000000,0.0,0.0
8479,4033677,68564389,Home,Female,White,DM,CKD,UnKnown,Depression,UnKnown,...,0,1,1,5,0.0,0.00,0.0,1.690000,0.0,0.0


In [56]:
from sklearn.preprocessing import LabelEncoder

listt = x.columns
for i in range(0, len(listt)):
    lb = LabelEncoder() 
    if x.dtypes[listt[i]] == np.object:
#         print("yes")
        x[listt[i]] = lb.fit_transform(x[listt[i]].astype(str))


In [57]:
X_train, X_test, y_train, y_test = train_test_split(x, classes, random_state=1)

In [58]:
from sklearn.linear_model import LogisticRegression
logisticRegr = LogisticRegression()
logisticRegr.fit(X_train, y_train)
predictions = logisticRegr.predict(X_test)
score = accuracy_score(y_test, predictions)
print(score)

0.7123998114097124


## Conclusion
As you can see that results with feature selection are better than results obtained wihtout any feature selection