# Data Loading

In [1]:
import os
import numpy as np
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.linear_model import LogisticRegression

In [3]:
data_dir_path = '../data'
filename_train = 'train_data.txt'
filename_test = 'test_data.txt'
filename_labels_test = 'answer.txt'

In [4]:
data_train = np.loadtxt(os.path.join(data_dir_path,filename_train),delimiter = ',')
labels_train = data_train[:,-1]
data_train = data_train[:,:-1]
data_test = np.loadtxt(os.path.join(data_dir_path,filename_test),delimiter = ',')
labels_test = np.loadtxt(os.path.join(data_dir_path,filename_labels_test),delimiter=',')

In [5]:
def myload(file_path):
    with open(file_path,'r') as f:
        lines = f.readlines()
        res = []
        for line in lines:
            res.append(list(map(lambda x:float(x),line.strip().split(','))))
    return np.array(res)


In [6]:
X_train,X_val,Y_train,Y_val = train_test_split(data_train,
                                               labels_train,
                                               train_size=0.8,
                                               random_state=1)

In [14]:
Y_train.sum()/len(Y_train)

0.3665625

In [7]:
import torch
import torch.nn as nn
import torch.optim 
from torch.utils.data import Dataset, DataLoader

In [8]:
de = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [9]:
model = nn.Sequential(
    nn.Linear(1000,300),
    nn.ReLU(),
    nn.Linear(300,300),
    nn.ReLU(),
    nn.Linear(300,20),
    nn.ReLU(),
    nn.Dropout(p=0.2),
    nn.Linear(20,1),
    nn.Sigmoid()
).to(de)

In [10]:
num_epochs = 200
batch_size = 100
lossF = nn.BCELoss(reduction='sum')
optimizer = torch.optim.Adam(model.parameters(),lr=0.0001,)

In [11]:
class BankRiskDataSet(Dataset):
    def __init__(self,X,Y):
        self.X = X
        self.Y = Y
    def __len__(self):
        return len(self.X)
    def __getitem__(self,i):
        return {"X": torch.from_numpy(self.X[i,:]),
                "Y":torch.tensor(self.Y[i])}

In [12]:
dataset_train = BankRiskDataSet(X_train,Y_train)
dataset_val= BankRiskDataSet(X_val,Y_val)
dataloader_train = DataLoader(dataset_train,batch_size=batch_size,num_workers=4)
dataloader_val = DataLoader(dataset_val,batch_size=batch_size,num_workers=4)

In [13]:
total_avg_loss_train = []
total_avg_loss_val = []
accuracy_train = []
accuracy_val = []
for epoch in range(1,num_epochs+1):
    model.train()
    avg_loss = 0
    n_equal = 0
    for i, data in enumerate(dataloader_train):
        x = data['X'].to(de).float()
        y = data["Y"].to(de).unsqueeze(1)
        y_pre = model(x)
        loss = lossF(y_pre,y)
        n_equal += torch.sum(y_pre.round()==y).item()
        model.zero_grad()
        loss.backward()
        optimizer.step()
        avg_loss +=loss
        if i%20==0 :
            print("\tEpoch: {}/{}\t {}/{} \t loss: {}".format(
                epoch,num_epochs,i,len(dataloader_train),loss))
    avg_loss /= len(dataset_train)
    total_avg_loss_train.append(avg_loss)
    accuracy_train.append(n_equal/(i*batch_size))
    print("Epoch: {}/{}, average training loss: {} accuracy: {}".format(
        epoch,num_epochs,avg_loss,n_equal/(i*batch_size)))
    model.eval()
    avg_loss=0
    n_equal = 0
    for i, data in enumerate(dataloader_val):
        x = data['X'].to(de).float()
        y = data["Y"].to(de).unsqueeze(1)
        y_pre = model(x)
        loss = lossF(y_pre,y)
        n_equal += torch.sum(y_pre.round()==y).item()
        avg_loss +=loss
    avg_loss /= len(dataset_val)
    total_avg_loss_val.append(avg_loss)
    accuracy_val.append(n_equal/(i*batch_size))
    print("Epoch: {}/{}, average validation loss: {} accuracy: {}".format(
        epoch,num_epochs,avg_loss,n_equal/(64*batch_size)))
    print(''.join(['-']*100))

	Epoch: 1/200	 0/64 	 loss: 69.55006408691406
	Epoch: 1/200	 20/64 	 loss: 63.773841857910156
	Epoch: 1/200	 40/64 	 loss: 65.99728393554688
	Epoch: 1/200	 60/64 	 loss: 71.68710327148438
Epoch: 1/200, average training loss: 0.6664705276489258 accuracy: 0.6360317460317461
Epoch: 1/200, average validation loss: 0.6619478464126587 accuracy: 0.15578125
----------------------------------------------------------------------------------------------------
	Epoch: 2/200	 0/64 	 loss: 64.22123718261719
	Epoch: 2/200	 20/64 	 loss: 64.63844299316406
	Epoch: 2/200	 40/64 	 loss: 63.27885437011719
	Epoch: 2/200	 60/64 	 loss: 70.93885803222656
Epoch: 2/200, average training loss: 0.657530665397644 accuracy: 0.6436507936507937
Epoch: 2/200, average validation loss: 0.6605881452560425 accuracy: 0.15578125
----------------------------------------------------------------------------------------------------
	Epoch: 3/200	 0/64 	 loss: 63.21237564086914
	Epoch: 3/200	 20/64 	 loss: 63.44578170776367
	Ep

	Epoch: 19/200	 0/64 	 loss: 41.66135787963867
	Epoch: 19/200	 20/64 	 loss: 30.754152297973633
	Epoch: 19/200	 40/64 	 loss: 50.063720703125
	Epoch: 19/200	 60/64 	 loss: 32.32978820800781
Epoch: 19/200, average training loss: 0.39042988419532776 accuracy: 0.8580952380952381
Epoch: 19/200, average validation loss: 0.5836552381515503 accuracy: 0.1815625
----------------------------------------------------------------------------------------------------
	Epoch: 20/200	 0/64 	 loss: 41.204345703125
	Epoch: 20/200	 20/64 	 loss: 29.9614200592041
	Epoch: 20/200	 40/64 	 loss: 42.005088806152344
	Epoch: 20/200	 60/64 	 loss: 30.775989532470703
Epoch: 20/200, average training loss: 0.37695708870887756 accuracy: 0.8649206349206349
Epoch: 20/200, average validation loss: 0.6131319999694824 accuracy: 0.181875
----------------------------------------------------------------------------------------------------
	Epoch: 21/200	 0/64 	 loss: 43.90792465209961
	Epoch: 21/200	 20/64 	 loss: 29.1510200

	Epoch: 37/200	 0/64 	 loss: 28.866436004638672
	Epoch: 37/200	 20/64 	 loss: 22.875743865966797
	Epoch: 37/200	 40/64 	 loss: 20.94289779663086
	Epoch: 37/200	 60/64 	 loss: 18.362991333007812
Epoch: 37/200, average training loss: 0.24385422468185425 accuracy: 0.943968253968254
Epoch: 37/200, average validation loss: 0.6594459414482117 accuracy: 0.17859375
----------------------------------------------------------------------------------------------------
	Epoch: 38/200	 0/64 	 loss: 28.958024978637695
	Epoch: 38/200	 20/64 	 loss: 21.37632942199707
	Epoch: 38/200	 40/64 	 loss: 18.939861297607422
	Epoch: 38/200	 60/64 	 loss: 19.146472930908203
Epoch: 38/200, average training loss: 0.23554694652557373 accuracy: 0.9492063492063492
Epoch: 38/200, average validation loss: 0.6639131903648376 accuracy: 0.1784375
----------------------------------------------------------------------------------------------------
	Epoch: 39/200	 0/64 	 loss: 23.586124420166016
	Epoch: 39/200	 20/64 	 loss: 

	Epoch: 55/200	 0/64 	 loss: 38.80913162231445
	Epoch: 55/200	 20/64 	 loss: 24.755285263061523
	Epoch: 55/200	 40/64 	 loss: 16.79283332824707
	Epoch: 55/200	 60/64 	 loss: 26.704471588134766
Epoch: 55/200, average training loss: 0.21956920623779297 accuracy: 0.9322222222222222
Epoch: 55/200, average validation loss: 1.3090946674346924 accuracy: 0.1421875
----------------------------------------------------------------------------------------------------
	Epoch: 56/200	 0/64 	 loss: 29.345916748046875
	Epoch: 56/200	 20/64 	 loss: 32.53141403198242
	Epoch: 56/200	 40/64 	 loss: 31.881343841552734
	Epoch: 56/200	 60/64 	 loss: 34.04395294189453
Epoch: 56/200, average training loss: 0.245337575674057 accuracy: 0.9160317460317461
Epoch: 56/200, average validation loss: 1.1374211311340332 accuracy: 0.1509375
----------------------------------------------------------------------------------------------------
	Epoch: 57/200	 0/64 	 loss: 20.29239273071289
	Epoch: 57/200	 20/64 	 loss: 27.65

	Epoch: 73/200	 0/64 	 loss: 9.781824111938477
	Epoch: 73/200	 20/64 	 loss: 4.634573936462402
	Epoch: 73/200	 40/64 	 loss: 5.449886322021484
	Epoch: 73/200	 60/64 	 loss: 1.5483832359313965
Epoch: 73/200, average training loss: 0.07272098958492279 accuracy: 0.9993650793650793
Epoch: 73/200, average validation loss: 1.0433696508407593 accuracy: 0.1784375
----------------------------------------------------------------------------------------------------
	Epoch: 74/200	 0/64 	 loss: 7.565694808959961
	Epoch: 74/200	 20/64 	 loss: 3.1620874404907227
	Epoch: 74/200	 40/64 	 loss: 9.231230735778809
	Epoch: 74/200	 60/64 	 loss: 1.5573183298110962
Epoch: 74/200, average training loss: 0.06205952167510986 accuracy: 1.0031746031746032
Epoch: 74/200, average validation loss: 1.091023325920105 accuracy: 0.17671875
----------------------------------------------------------------------------------------------------
	Epoch: 75/200	 0/64 	 loss: 6.688286781311035
	Epoch: 75/200	 20/64 	 loss: 2.38

	Epoch: 91/200	 0/64 	 loss: 1.6523646116256714
	Epoch: 91/200	 20/64 	 loss: 6.298296928405762
	Epoch: 91/200	 40/64 	 loss: 1.7148432731628418
	Epoch: 91/200	 60/64 	 loss: 6.211189270019531
Epoch: 91/200, average training loss: 0.04869731888175011 accuracy: 1.003968253968254
Epoch: 91/200, average validation loss: 1.2215322256088257 accuracy: 0.17453125
----------------------------------------------------------------------------------------------------
	Epoch: 92/200	 0/64 	 loss: 1.325610876083374
	Epoch: 92/200	 20/64 	 loss: 5.806584358215332
	Epoch: 92/200	 40/64 	 loss: 1.667372465133667
	Epoch: 92/200	 60/64 	 loss: 22.040668487548828
Epoch: 92/200, average training loss: 0.05105359107255936 accuracy: 1.0015873015873016
Epoch: 92/200, average validation loss: 1.2850303649902344 accuracy: 0.1746875
----------------------------------------------------------------------------------------------------
	Epoch: 93/200	 0/64 	 loss: 1.8699820041656494
	Epoch: 93/200	 20/64 	 loss: 3.7

	Epoch: 109/200	 0/64 	 loss: 0.2630351483821869
	Epoch: 109/200	 20/64 	 loss: 0.18247486650943756
	Epoch: 109/200	 40/64 	 loss: 0.34598585963249207
	Epoch: 109/200	 60/64 	 loss: 0.31125396490097046
Epoch: 109/200, average training loss: 0.004226042423397303 accuracy: 1.0152380952380953
Epoch: 109/200, average validation loss: 1.3770567178726196 accuracy: 0.17828125
----------------------------------------------------------------------------------------------------
	Epoch: 110/200	 0/64 	 loss: 0.2576953172683716
	Epoch: 110/200	 20/64 	 loss: 0.11547815054655075
	Epoch: 110/200	 40/64 	 loss: 0.3049963712692261
	Epoch: 110/200	 60/64 	 loss: 0.16081932187080383
Epoch: 110/200, average training loss: 0.0025144254323095083 accuracy: 1.0158730158730158
Epoch: 110/200, average validation loss: 1.471667766571045 accuracy: 0.17734375
----------------------------------------------------------------------------------------------------
	Epoch: 111/200	 0/64 	 loss: 0.18962210416793823
	Epoc

	Epoch: 126/200	 60/64 	 loss: 0.07179802656173706
Epoch: 126/200, average training loss: 0.0008850910817272961 accuracy: 1.0158730158730158
Epoch: 126/200, average validation loss: 1.665224552154541 accuracy: 0.1790625
----------------------------------------------------------------------------------------------------
	Epoch: 127/200	 0/64 	 loss: 0.03952128440141678
	Epoch: 127/200	 20/64 	 loss: 0.09662816673517227
	Epoch: 127/200	 40/64 	 loss: 0.09024892747402191
	Epoch: 127/200	 60/64 	 loss: 0.024554427713155746
Epoch: 127/200, average training loss: 0.0011394324246793985 accuracy: 1.0158730158730158
Epoch: 127/200, average validation loss: 1.69119393825531 accuracy: 0.17921875
----------------------------------------------------------------------------------------------------
	Epoch: 128/200	 0/64 	 loss: 0.09276936948299408
	Epoch: 128/200	 20/64 	 loss: 0.09561511874198914
	Epoch: 128/200	 40/64 	 loss: 0.15945787727832794
	Epoch: 128/200	 60/64 	 loss: 0.12883351743221283
Ep

Epoch: 143/200, average validation loss: 1.7920366525650024 accuracy: 0.179375
----------------------------------------------------------------------------------------------------
	Epoch: 144/200	 0/64 	 loss: 0.029697611927986145
	Epoch: 144/200	 20/64 	 loss: 0.06691034138202667
	Epoch: 144/200	 40/64 	 loss: 0.04075854644179344
	Epoch: 144/200	 60/64 	 loss: 0.039157480001449585
Epoch: 144/200, average training loss: 0.0005339503404684365 accuracy: 1.0158730158730158
Epoch: 144/200, average validation loss: 1.8026798963546753 accuracy: 0.17875
----------------------------------------------------------------------------------------------------
	Epoch: 145/200	 0/64 	 loss: 0.016415253281593323
	Epoch: 145/200	 20/64 	 loss: 0.06239648163318634
	Epoch: 145/200	 40/64 	 loss: 0.03666207939386368
	Epoch: 145/200	 60/64 	 loss: 0.015363032929599285
Epoch: 145/200, average training loss: 0.0005156060215085745 accuracy: 1.0158730158730158
Epoch: 145/200, average validation loss: 1.80868959

	Epoch: 161/200	 0/64 	 loss: 0.14396268129348755
	Epoch: 161/200	 20/64 	 loss: 0.013023389503359795
	Epoch: 161/200	 40/64 	 loss: 0.013776701875030994
	Epoch: 161/200	 60/64 	 loss: 0.01018317136913538
Epoch: 161/200, average training loss: 0.0003108964883722365 accuracy: 1.0158730158730158
Epoch: 161/200, average validation loss: 1.973680019378662 accuracy: 0.1784375
----------------------------------------------------------------------------------------------------
	Epoch: 162/200	 0/64 	 loss: 0.016429508104920387
	Epoch: 162/200	 20/64 	 loss: 0.044177111238241196
	Epoch: 162/200	 40/64 	 loss: 0.032037679105997086
	Epoch: 162/200	 60/64 	 loss: 0.020436573773622513
Epoch: 162/200, average training loss: 0.00022396848362404853 accuracy: 1.0158730158730158
Epoch: 162/200, average validation loss: 1.9687750339508057 accuracy: 0.17796875
----------------------------------------------------------------------------------------------------
	Epoch: 163/200	 0/64 	 loss: 0.1144732981920

	Epoch: 178/200	 60/64 	 loss: 0.9552910327911377
Epoch: 178/200, average training loss: 0.004772160667926073 accuracy: 1.0152380952380953
Epoch: 178/200, average validation loss: 1.7572882175445557 accuracy: 0.1740625
----------------------------------------------------------------------------------------------------
	Epoch: 179/200	 0/64 	 loss: 0.1573781967163086
	Epoch: 179/200	 20/64 	 loss: 2.0850985050201416
	Epoch: 179/200	 40/64 	 loss: 0.6421090364456177
	Epoch: 179/200	 60/64 	 loss: 0.2360810935497284
Epoch: 179/200, average training loss: 0.0030850842595100403 accuracy: 1.015079365079365
Epoch: 179/200, average validation loss: 1.7182444334030151 accuracy: 0.17859375
----------------------------------------------------------------------------------------------------
	Epoch: 180/200	 0/64 	 loss: 0.07781560719013214
	Epoch: 180/200	 20/64 	 loss: 0.041494742035865784
	Epoch: 180/200	 40/64 	 loss: 0.17165575921535492
	Epoch: 180/200	 60/64 	 loss: 0.025657156482338905
Epoch

Epoch: 195/200, average validation loss: 2.207350492477417 accuracy: 0.179375
----------------------------------------------------------------------------------------------------
	Epoch: 196/200	 0/64 	 loss: 0.0014852762687951326
	Epoch: 196/200	 20/64 	 loss: 0.013714366592466831
	Epoch: 196/200	 40/64 	 loss: 0.013920262455940247
	Epoch: 196/200	 60/64 	 loss: 0.010049261152744293
Epoch: 196/200, average training loss: 0.00014162460865918547 accuracy: 1.0158730158730158
Epoch: 196/200, average validation loss: 2.2329118251800537 accuracy: 0.179375
----------------------------------------------------------------------------------------------------
	Epoch: 197/200	 0/64 	 loss: 0.007313902024179697
	Epoch: 197/200	 20/64 	 loss: 0.0024161292240023613
	Epoch: 197/200	 40/64 	 loss: 0.015003976412117481
	Epoch: 197/200	 60/64 	 loss: 0.004175066947937012
Epoch: 197/200, average training loss: 9.743701230036095e-05 accuracy: 1.0158730158730158
Epoch: 197/200, average validation loss: 2.2