## Importing Libraries 

In [1]:
import numpy as np
import pandas as pd
from random import randint

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier

In [4]:
classifiers = ['DecisionTree', 'KNeighbors']

In [5]:
models = [DecisionTreeClassifier(random_state=0), KNeighborsClassifier()]

## Creating Needed Functions for GA 

In [6]:
def preprocessing(df):
    
    df.drop(["PatientId","EncounterId", "ReadmissionWithin_90Days"],axis = 1,inplace = True)
   
    df.fillna(0, inplace=True)
    
    df['Race'].replace(['Black or African American', 'Other Race', 'Asian', 'American Indian or Alaska Native', 'UnKnown', 'Native Hawaiian or Other Pacific Islander', 'White'],
                        [0, 1, 2, 3, 4, 5, 6], inplace=True)

    df['Race'].astype(float)
    
    df['DischargeDisposision'].replace(['Home', 'SNF', 'Home Health', 'Expired', 'IRF', 'Hospice', 'LTCH', 'ACH', 'Hospice - Home', 'LAMA', 'Other', 'Custodial', 'ICF', 'Psych', 'IP Admit', 'CAH', 'Expired - Unknown', 'Expired - Facility', 'Still Patient', 'Swing Bed', 'FHCF', 'Home Health - IV'],
                        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21], inplace=True)

    df['DischargeDisposision'].astype(float)
    
    df['Gender'].replace(['Male', 'Female'],
                        [1, 0], inplace=True)

    df['DiabetesMellitus'].replace(['DM'],
                            [1], inplace=True)

    df['ChronicKidneyDisease'].replace(['CKD'],
                            [1], inplace=True)

    df['Anemia'].replace(['Anemia'],
                            [1], inplace=True)

    df['ChronicObstructivePulmonaryDisease'].replace(['COPD'],
                            [1], inplace=True)

    df['Depression '].replace(['Depression '],
                            [1], inplace=True)


    df['Gender'].astype(float)

    df['DiabetesMellitus'].astype(float)

    df['ChronicKidneyDisease'].astype(float)

    df['Anemia'].astype(float)

    df['ChronicObstructivePulmonaryDisease'].astype(float)

    df['Depression '].astype(float)

    return df

In [7]:
def init(size, num):
    
    population = []
    
    for i in range(size):
        csm = np.ones(num, dtype = np.bool)     
        
        csm[:int(0.3 * num)] = False             
        
        np.random.shuffle(csm)
        
        population.append(csm)
    
    return population

In [8]:
def fitness_function(population):
    
    scores = []
    
    for csm in population:
        model.fit(X_train.iloc[:, csm], Y_train)         
        
        pred = model.predict(X_test.iloc[:, csm])
        
        scores.append(accuracy_score(Y_test, pred))
    
    scores, population = np.array(scores), np.array(population) 
    
    inds = np.argsort(scores)                                    
    
    return list(scores[inds][::-1]), list(population[inds,:][::-1]) 

In [9]:
def crossover(updated_population):
    
    new_gen = updated_population
    
    for i in range(0,len(updated_population),2):
        new_parents = []
        
        c1 , c2 = new_gen[i] , new_gen[i+1]
        
        new_parents = np.concatenate((c1[:len(c1)//2],c2[len(c1)//2:]))
        
        new_gen.append(new_parents)
    
    return new_gen

In [10]:
def mutation(after_cross, mutation_rate, n_feat):   
    
    m_range = int(mutation_rate * n_feat)
    
    new_gen = []
    
    for n in range(0, len(after_cross)):
        chromo = after_cross[n]
        
        r_position = [] 
        
        for i in range(0, m_range):
            pos = randint(0, n_feat - 1)
            
            r_position.append(pos)
        
        for j in r_position:
            chromo[j] = not chromo[j]  
        
        new_gen.append(chromo)
    
    return new_gen

In [11]:
def selection(after_fit, n_parents):
    
    new_gen = []
    
    for i in range(n_parents):
        new_gen.append(after_fit[i])
    
    return new_gen

In [12]:
def n_gen(data, label, size, new_features, new_parents, m_rate, next_gen, X_train, X_test, Y_train, Y_test):
    
    b_csm = []
    
    b_score = []
    
    new_gen = init(size,new_features)
    
    for i in range(next_gen):
        scores, after_fit = fitness_function(new_gen)
        
        print('Best score in generation', i+1, ':', scores[:1])
        
        after_selection = selection(after_fit, new_parents)
        
        after_cross = crossover(after_selection)
        
        new_gen = mutation(after_cross, m_rate, new_features)
        
        b_csm.append(after_fit[0])
        
        b_score.append(scores[0])
    
    return b_csm, b_score

In [13]:
def acc(data, label):
    
    j = 0
    
    acc = []
    
    score = pd.DataFrame({"Classifier":classifiers})
       
    X_train,X_test,Y_train,Y_test = split(data, label)
    
    for i in models:
        
        model = i
        
        model.fit(X_train, Y_train)
        
        preds = model.predict(X_test)
        
        acc.append(accuracy_score(Y_test, preds))
        
        j = j + 1     
    
    score["Accuracy"] = acc
    
    score.sort_values(by = "Accuracy", ascending = False, inplace = True)
    
    score.reset_index(drop = True, inplace = True)
    
    return score

In [14]:
def split(data, label):
    X_train, X_test, Y_train, Y_test = train_test_split(data, label, test_size=0.25, random_state=42)
    return X_train, X_test, Y_train, Y_test

## Importing & Preprocessing Data 

In [15]:
df = pd.read_csv("Training_Data.csv")

In [16]:
label = df["ReadmissionWithin_90Days"]

In [17]:
label = np.where(label == 'Yes', 1, 0)

In [18]:
df = preprocessing(df)

In [19]:
df

Unnamed: 0,DischargeDisposision,Gender,Race,DiabetesMellitus,ChronicKidneyDisease,Anemia,Depression,ChronicObstructivePulmonaryDisease,Age,ChronicDiseaseCount,...,ARBs,BetaBlockers,Diuretics,TotalMedicine,CardiacTroponin,Hemoglobin,SerumSodium,SerumCreatinine,BNP,NT-proBNP
0,0,1,6,1,0,1,0,1,58,18,...,0,0,0,0,0.0,0.00,0.0,0.000000,0.0,0.0
1,0,1,6,1,1,1,1,1,80,24,...,0,1,5,8,0.0,0.00,0.0,1.540000,0.0,0.0
2,0,1,6,1,1,1,0,1,63,11,...,0,1,1,2,0.0,10.20,0.0,0.000000,0.0,0.0
3,8,0,6,1,0,1,0,1,73,8,...,0,0,0,0,0.0,0.00,132.0,0.000000,0.0,0.0
4,1,0,6,0,0,1,0,1,85,20,...,0,0,0,0,0.0,7.26,0.0,0.000000,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8476,2,0,6,1,1,1,0,1,59,14,...,1,2,3,6,0.0,0.00,0.0,1.076667,0.0,0.0
8477,1,1,6,1,1,0,1,1,86,13,...,0,0,0,0,0.0,0.00,0.0,0.000000,0.0,0.0
8478,5,0,6,0,0,0,0,0,94,2,...,0,1,0,1,0.0,0.00,0.0,0.000000,0.0,0.0
8479,0,0,6,1,1,0,1,0,77,19,...,0,1,1,5,0.0,0.00,0.0,1.690000,0.0,0.0


## Accuracy with Other Classifiers

In [20]:
score = acc(df, label)
score

Unnamed: 0,Classifier,Accuracy
0,KNeighbors,0.671381
1,DecisionTree,0.652994


## Training & Running GA

In [21]:
model = GradientBoostingClassifier(random_state = 0)

In [22]:
X_train, X_test, Y_train, Y_test = split(df,label)

In [23]:
chromosomes, scorez = n_gen(df, label, size = 80, new_features = df.shape[1], new_parents = 25, m_rate = 0.20, next_gen = 5 , X_train = X_train, X_test = X_test, Y_train = Y_train, Y_test = Y_test)

Best score in generation 1 : [0.7435172088637435]
Best score in generation 2 : [0.7355021216407355]
Best score in generation 3 : [0.7364450730787364]
Best score in generation 4 : [0.7378595002357379]
Best score in generation 5 : [0.7369165487977369]


In [24]:
 print('Genetic Algorithm Accuracy Score:', scorez[0])

Genetic Algorithm Accuracy Score: 0.7435172088637435


## Fin 