In [10]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import KFold
from sklearn import preprocessing


In [11]:
##################################################################################

def preprocess(dirty_df):
  dirty_df = dirty_df.drop(['patient_id','cohort_type'], axis = 1)
  target_map = {u'1': 1, u'0': 0}
  dirty_df['__target__'] = dirty_df['cohort_flag'].map(str).map(target_map)
  dirty_df = dirty_df.drop(['cohort_flag'], axis = 1)
  clean_X = dirty_df.drop('__target__', axis=1)
  clean_y = np.array(dirty_df['__target__'])
  return clean_X, clean_y

def preprocess_normalizationx(dirty_df):
  dirty_df = dirty_df.drop(['patient_id','cohort_type'], axis = 1)
  target_map = {u'1': 1, u'0': 0}
  dirty_df['__target__'] = dirty_df['cohort_flag'].map(str).map(target_map)
  dirty_df = dirty_df.drop(['cohort_flag'], axis = 1)
  clean_X = dirty_df.drop('__target__', axis=1)
  clean_X = clean_X.to_numpy()
  clean_X = preprocessing.normalize(clean_X, norm='l2')
  clean_y = np.array(dirty_df['__target__'])
  return clean_X, clean_y


def evaluateForest(ntrees, RFresults, X_train, y_train, X1_train, y1_train, X2_train, y2_train, X_test, y_test): 
  clf1 = RandomForestClassifier(n_estimators=ntrees,random_state=10000)
  clf2 = RandomForestClassifier(n_estimators=ntrees,random_state=10000)
  clf  = RandomForestClassifier(n_estimators=ntrees,random_state=10000)
  
  clf1.fit(X1_train, y1_train)
  accP1  = accuracy_score(y_test,clf1.predict(X_test))

  clf2.fit(X2_train, y2_train)
  accP2 = accuracy_score(y_test,clf2.predict(X_test))
    
  clf.fit(X_train, y_train)
  accALL = accuracy_score(y_test,clf.predict(X_test))
  
  # Merging of RF models  
  clf1.estimators_ += clf2.estimators_
  clf1.n_estimators = len(clf1.estimators_)
  accMERG = accuracy_score(y_test,clf1.predict(X_test))

  RFresults[i] = [accP1,accP2,accALL,accMERG]
  

In [33]:
##################################################################################

# Load the data
df1 = pd.read_csv('party_1.csv')
df2 = pd.read_csv('party_2.csv')

X1, y1 = preprocess_normalizationx(df1)
X2, y2 = preprocess_normalizationx(df2)

n_train=700
n_test=131
train_y1=y1[0:n_train]
train_X1=X1[0:n_train]
test_y1=y1[n_train:n_train+n_test]
test_x1=X1[n_train:n_train+n_test]    
    
train_y1=pd.DataFrame(train_y1)
train_X1=pd.DataFrame(train_X1)
test_y1=pd.DataFrame(test_y1)
test_x1=pd.DataFrame(test_x1)

n_times = 3

In [28]:
def generate_input_mpc(list_files, out_file_mpc, n_train, n_test):
    # part    
    train_y1=y1[0:n_train]
    train_X1=X1[0:n_train]
    test_y1=y1[n_train:n_train+n_test]
    test_x1=X1[n_train:n_train+n_test]    

    train_y1=pd.DataFrame(train_y1)
    train_X1=pd.DataFrame(train_X1)
    test_y1=pd.DataFrame(test_y1)
    test_x1=pd.DataFrame(test_x1)
    
    train_X1.to_csv(list_files[0], header=False, sep=' ', index=False)
    y1_aux = pd.DataFrame(train_y1)
    y1_aux.to_csv(list_files[1], header=False, sep=' ', index=False)
    test_x1.to_csv(list_files[2], header=False, sep=' ', index=False)
    y2_aux = pd.DataFrame(test_y1)
    y2_aux.to_csv(list_files[3], header=False, sep=' ', index=False)
    
    
    filenames = list_files
    with open(out_file_mpc, 'w') as outfile:
        for fname in filenames:
            with open(fname) as infile:
                for line in infile:
                    outfile.write(line)
                    
generate_input_mpc(['Player-Data/train_X1', 'Player-Data/train_y1', 'Player-Data/test_x1', 'Player-Data/test_y1'], 'Player-Data/Input-P0-0', n_train=700, n_test=131)

generate_input_mpc(['Player-Data/train_X2', 'Player-Data/train_y2', 'Player-Data/test_x2', 'Player-Data/test_y2'], 'Player-Data/Input-P1-0', n_train=700, n_test=131)

In [31]:
# date: July 22, 2021
# name: Martine De Cock
# description: Training ML models on IDASH2021, Track 3 data

# DP LR

import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from sklearn import preprocessing
from numpy import savetxt
import sys

rows = None
cols = None
num_of_folds = 2

##################################################################################

def preprocess(dirty_df):
    dirty_df = dirty_df.drop(['patient_id', 'cohort_type'], axis=1)
    target_map = {u'1': 1, u'0': 0}
    dirty_df['__target__'] = dirty_df['cohort_flag'].map(str).map(target_map)
    dirty_df = dirty_df.drop(['cohort_flag'], axis=1)
    clean_X = dirty_df.drop('__target__', axis=1)

    if cols is not None:
        clean_X = clean_X.iloc[:, :cols]

    clean_X = clean_X.to_numpy()
    clean_X = preprocessing.normalize(clean_X, norm='l2')
    clean_y = np.array(dirty_df['__target__'])

    if rows is not None:
        return clean_X[0:rows], clean_y[0:rows]

    return clean_X, clean_y


##################################################################################


# Load the data
df1 = pd.read_csv('data/alice_data.csv')
df2 = pd.read_csv('data/bob_data.csv')

print(df1.shape)

X1, y1 = preprocess(df1)
X2, y2 = preprocess(df2)

# This will hold 4 accuracy results for each of 5 folds
# (1) accuracy of model trained on data from P1
# (2) accuracy of model trained on data from P2
# (3) accuracy of model trained on data from P1 and from P2
# (4) accuracy of aggregation of model (1) and (2) from above
LRresults = np.zeros((5, 5))

kf1 = KFold(n_splits=num_of_folds, shuffle=True, random_state=42)
kf2 = KFold(n_splits=num_of_folds, shuffle=True, random_state=42)

epsilon = 1
mylambda = 0.5

fold = 5

for (train1_indices, test1_indices), (train2_indices, test2_indices) in zip(kf1.split(X1, y1), kf2.split(X2, y2)):

    print("Starting to process fold {n}".format(n=fold))

    AliceX_train, AliceX_test = X1[train1_indices, :].tolist(), X1[test1_indices, :].tolist()
    Alicey_train, Alicey_test = y1[train1_indices].tolist(), y1[test1_indices].tolist()
    BobX_train, BobX_test = X2[train2_indices, :].tolist(), X2[test2_indices, :].tolist()
    Boby_train, Boby_test = y2[train2_indices].tolist(), y2[test2_indices].tolist()

    # Get rid of scientific notation
    AliceX_train = [[str(f'{j:.10f}') for j in i] for i in AliceX_train]
    AliceX_test = [[str(f'{j:.10f}') for j in i] for i in AliceX_test]
    Alicey_train = [str(f'{i:.10f}') for i in Alicey_train]
    Alicey_test = [str(f'{i:.10f}') for i in Alicey_test]

    BobX_train = [[str(f'{j:.10f}') for j in i] for i in BobX_train]
    BobX_test = [[str(f'{j:.10f}') for j in i] for i in BobX_test]
    Boby_train = [str(f'{i:.10f}') for i in Boby_train]
    Boby_test = [str(f'{i:.10f}') for i in Boby_test]

    savetxt('data/Alice/train_X_fold{n}.csv'.format(n=fold), AliceX_train, delimiter=',', fmt='%s')
    savetxt('data/Alice/train_y_fold{n}.csv'.format(n=fold), Alicey_train, delimiter=',', fmt='%s')
    savetxt('data/Alice/test_X_fold{n}.csv'.format(n=fold), AliceX_test, delimiter=',', fmt='%s')
    savetxt('data/Alice/test_y_fold{n}.csv'.format(n=fold), Alicey_test, delimiter=',', fmt='%s')

    savetxt('data/Bob/train_X_fold{n}.csv'.format(n=fold), BobX_train, delimiter=',', fmt='%s')
    savetxt('data/Bob/train_y_fold{n}.csv'.format(n=fold), Boby_train, delimiter=',', fmt='%s')
    savetxt('data/Bob/test_X_fold{n}.csv'.format(n=fold), BobX_test, delimiter=',', fmt='%s')
    savetxt('data/Bob/test_y_fold{n}.csv'.format(n=fold), Boby_test, delimiter=',', fmt='%s')

    fold += 1


(831, 1877)
Starting to process fold 5
Starting to process fold 6


In [36]:
def generate_input_mpc1(list_files, out_file_mpc):
    filenames = list_files
    with open(out_file_mpc, 'w') as outfile:
        for fname in filenames:
            with open(fname) as infile:
                for line in infile:
                    outfile.write(line)
                    
generate_input_mpc1(['data/Alice/train_X_fold0.csv', 'data/Alice/train_y_fold0.csv', 'data/Alice/test_X_fold0.csv', 'data/Alice/test_y_fold0.csv'], 'data/Input-P0-0')

In [37]:
from sklearn.linear_model import Perceptron
from sklearn.metrics import classification_report, confusion_matrix

train_X1 = pd.read_csv('data/Alice/train_X_fold0.csv')
train_y1 = pd.read_csv('data/Alice/train_y_fold0.csv')
test_x1 = pd.read_csv('data/Alice/test_X_fold0.csv')
test_y1 = pd.read_csv('data/Alice/test_y_fold0.csv')

for i in range(n_times):
    perceptron_sklearn = Perceptron(max_iter=200)
    perceptron_sklearn.fit(train_X1,train_y1)

    resultado_predicao = perceptron_sklearn.predict(test_x1)

    print('confusion_matrix\n', confusion_matrix(resultado_predicao, test_y1))
    print('classification_report\n',classification_report(resultado_predicao, test_y1)) 

  return f(**kwargs)
  return f(**kwargs)


confusion_matrix
 [[58 13]
 [ 4 91]]
classification_report
               precision    recall  f1-score   support

         0.0       0.94      0.82      0.87        71
         1.0       0.88      0.96      0.91        95

    accuracy                           0.90       166
   macro avg       0.91      0.89      0.89       166
weighted avg       0.90      0.90      0.90       166

confusion_matrix
 [[58 13]
 [ 4 91]]
classification_report
               precision    recall  f1-score   support

         0.0       0.94      0.82      0.87        71
         1.0       0.88      0.96      0.91        95

    accuracy                           0.90       166
   macro avg       0.91      0.89      0.89       166
weighted avg       0.90      0.90      0.90       166

confusion_matrix
 [[58 13]
 [ 4 91]]
classification_report
               precision    recall  f1-score   support

         0.0       0.94      0.82      0.87        71
         1.0       0.88      0.96      0.91        95

  

  return f(**kwargs)


In [16]:
from sklearn.linear_model import LogisticRegression

for i in range(n_times):
    regr = LogisticRegression(max_iter=200)
    regr.fit(train_X1,train_y1)
    resultado_predicao = regr.predict(test_x1)
    print('confusion_matrix\n', confusion_matrix(resultado_predicao, test_y1))
    print('classification_report\n',classification_report(resultado_predicao, test_y1)) 

  return f(**kwargs)
  return f(**kwargs)


confusion_matrix
 [[33  6]
 [10 82]]
classification_report
               precision    recall  f1-score   support

           0       0.77      0.85      0.80        39
           1       0.93      0.89      0.91        92

    accuracy                           0.88       131
   macro avg       0.85      0.87      0.86       131
weighted avg       0.88      0.88      0.88       131

confusion_matrix
 [[33  6]
 [10 82]]
classification_report
               precision    recall  f1-score   support

           0       0.77      0.85      0.80        39
           1       0.93      0.89      0.91        92

    accuracy                           0.88       131
   macro avg       0.85      0.87      0.86       131
weighted avg       0.88      0.88      0.88       131

confusion_matrix
 [[33  6]
 [10 82]]
classification_report
               precision    recall  f1-score   support

           0       0.77      0.85      0.80        39
           1       0.93      0.89      0.91        92

  

  return f(**kwargs)


In [17]:
import tensorflow as tf
from sklearn.metrics import classification_report

for i in range(n_times):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(units=1, activation='sigmoid', input_shape=(1874, )))
    model.summary()
    model.compile(optimizer='Sgd', loss='binary_crossentropy', metrics = ['accuracy'])
    epochs_hist = model.fit(train_X1, train_y1, epochs = 200) 
    predict = model.predict(test_x1)
    y_predict = (predict > 0.5)
    print(classification_report(y_predict, test_y1)) 

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 1)                 1875      
Total params: 1,875
Trainable params: 1,875
Non-trainable params: 0
_________________________________________________________________
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch

Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200


Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         0
           1       1.00      0.67      0.80       131

    accuracy                           0.67       131
   macro avg       0.50      0.34      0.40       131
weighted avg       1.00      0.67      0.80       131

Model: "sequential_3"
_______________________________________________________________

  _warn_prf(average, modifier, msg_start, len(result))


Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200
Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155

Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200
              precision    recall  f1-score   support

       False       0.02      1.00      0.05         1
        True       1.00      0.68      0.81       130

    accuracy                           0.68       131
   macro avg       0.51      0.84      0.43       131
weighted avg       0.99      0.68      0.80       131

Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
d

Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200

Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200
Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 

Epoch 199/200
Epoch 200/200
              precision    recall  f1-score   support

       False       0.02      1.00      0.05         1
        True       1.00      0.68      0.81       130

    accuracy                           0.68       131
   macro avg       0.51      0.84      0.43       131
weighted avg       0.99      0.68      0.80       131



In [3]:
# For each method, these will hold 4 accuracy results for each of 5 folds
# (1) accuracy of model trained on data from P1
# (2) accuracy of model trained on data from P2
# (3) accuracy of model trained on data from P1 and from P2
# (4) accuracy of aggregation of model (1) and (2) from above
LRresults = np.zeros((5, 4))
RF50results = np.zeros((5, 4))
RF100results = np.zeros((5, 4))
RF200results = np.zeros((5, 4))
RF400results = np.zeros((5, 4))

kf1 = KFold(n_splits=5,shuffle = True,random_state = 42)
kf2 = KFold(n_splits=5,shuffle = True,random_state = 42)


i = 0
for result1,result2 in zip(kf1.split(X1,y1),kf2.split(X2,y2)):
  print("FOLD ", i+1)
  X1_train, X1_test = X1.iloc[result1[0]], X1.iloc[result1[1]]
  y1_train, y1_test = y1[result1[0]], y1[result1[1]]
  X2_train, X2_test = X2.iloc[result2[0]], X2.iloc[result2[1]]
  y2_train, y2_test = y2[result2[0]], y2[result2[1]]

  X_train = X1_train.append(X2_train)
  y_train = np.append(y1_train,y2_train)
  X_test = X1_test.append(X2_test)
  y_test = np.append(y1_test,y2_test)


  ########## Train and test logistic regression models #################

  clf1 = LogisticRegression(solver='liblinear',random_state=10000)
  clf2 = LogisticRegression(solver='liblinear',random_state=10000)
  clf = LogisticRegression(solver='liblinear',random_state=10000)
  
  clf1.fit(X1_train, y1_train)
  accP1  = accuracy_score(y_test,clf1.predict(X_test))
  
  clf2.fit(X2_train, y2_train)
  accP2 = accuracy_score(y_test,clf2.predict(X_test))
    
  clf.fit(X_train, y_train)
  accALL = accuracy_score(y_test,clf.predict(X_test))
  
  # Merging of LR models
  clf1.coef_ = (clf1.coef_ + clf2.coef_)/2
  clf1.intercept_ = (clf1.intercept_ + clf2.intercept_)/2
  accMERG = accuracy_score(y_test,clf1.predict(X_test))  

  LRresults[i] = [accP1,accP2,accALL,accMERG]
    

  ########## Train and test RF models #################################

  ntrees=50
  RFresults=RF50results
  evaluateForest(ntrees, RFresults, X_train, y_train, X1_train, y1_train, 
                         X2_train, y2_train, X_test, y_test) 
  
  ntrees=100
  RFresults=RF100results
  evaluateForest(ntrees, RFresults, X_train, y_train, X1_train, y1_train, 
                         X2_train, y2_train, X_test, y_test) 

  ntrees=200
  RFresults=RF200results
  evaluateForest(ntrees, RFresults, X_train, y_train, X1_train, y1_train, 
                         X2_train, y2_train, X_test, y_test) 

  ntrees=400
  RFresults=RF400results
  evaluateForest(ntrees, RFresults, X_train, y_train, X1_train, y1_train, 
                         X2_train, y2_train, X_test, y_test) 


  print("==========completed")
  i = i + 1

# Printing the averages over the 5 folds
print("          P1,   P2, All, P1&P2")
np.set_printoptions(precision=2)
print("LR:     ",np.mean(LRresults, axis=0))
print("RF-50:  ",np.mean(RF50results, axis=0))
print("RF-100: ",np.mean(RF100results, axis=0))
print("RF-200: ",np.mean(RF200results, axis=0))
print("RF-400: ",np.mean(RF400results, axis=0)) 

FOLD  1
FOLD  2
FOLD  3
FOLD  4
FOLD  5
          P1,   P2, All, P1&P2
LR:      [0.85 0.83 0.86 0.87]
RF-50:   [0.82 0.82 0.87 0.86]
RF-100:  [0.82 0.84 0.87 0.87]
RF-200:  [0.82 0.84 0.88 0.87]
RF-400:  [0.82 0.85 0.88 0.87]
