In [None]:
# other stuff
# from EK_modified import edmonds_karp
#
# import networkx as nx
#
# G = nx.DiGraph()
# G.add_edge("x", "a", capacity=3.0)
# G.add_edge("x", "b", capacity=1.0)
# G.add_edge("a", "c", capacity=3.0)
# G.add_edge("b", "c", capacity=5.0)
# G.add_edge("b", "d", capacity=4.0)
# G.add_edge("d", "e", capacity=2.0)
# G.add_edge("c", "y", capacity=2.0)
# G.add_edge("e", "y", capacity=3.0)
#
# R = edmonds_karp(G, "x", "y")
# flow_value = nx.maximum_flow_value(G, "x", "y")
# print(flow_value)
#
# flow_value == R.graph["flow_value"]
#
# print(flow_value)


In [1]:
# importing libraries

import pandas as pd
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as torch_optim
import torch.nn as nn
import torch.nn.functional as F
from datetime import datetime

In [2]:
# importing data & viewing
raw_data = pd.read_csv('./data/data_v1.csv')
print("Shape:", raw_data.shape)
print(raw_data.head())


Shape: (17, 7)
   Arbitrator_id  Arbitrator_seniority Arbitrator_background  \
0              1                    18     Quantity Surveyor   
1              1                    18     Quantity Surveyor   
2              1                    18     Quantity Surveyor   
3              1                    18     Quantity Surveyor   
4              2                    18              Engineer   

   Arbitrator_total_accepted_cases          Case_nature  Case_dispute_amount  \
0                                1    Construction_Case                12500   
1                                1    Construction_Case               100000   
2                                1    Construction_Case                72080   
3                                1  Building Management                50000   
4                                5    Construction_Case                 4500   

   Case_accepted  
0              1  
1              0  
2              0  
3              0  
4              1  


In [67]:
# onehot enocoding categorical data
mod1_data = raw_data.astype({'Arbitrator_id': 'object'})
mod2_data = mod1_data.copy()
for col in mod1_data.columns:
    if mod1_data.dtypes[col] == "object":
        mod1_data[col] = LabelEncoder().fit_transform(mod1_data[col])
        n = np.max(mod1_data[col])
        tempCol = torch.nn.functional.one_hot(torch.from_numpy(mod1_data[col].to_numpy()), int(n) + 1)
        tempCol = torch.transpose(tempCol, 0, 1).tolist()
        mod2_data.drop(columns=col, inplace=True)

        for i in range(len(tempCol)):
            mod2_data[col+"_"+str(i)] = tempCol[i]
            
# print(raw_data.head())
# print(mod1_data.head())
mod2_data

Unnamed: 0,Arbitrator_seniority,Arbitrator_total_accepted_cases,Case_dispute_amount,Case_accepted,Arbitrator_id_0,Arbitrator_id_1,Arbitrator_id_2,Arbitrator_id_3,Arbitrator_id_4,Arbitrator_id_5,Arbitrator_id_6,Arbitrator_background_0,Arbitrator_background_1,Arbitrator_background_2,Arbitrator_background_3,Case_nature_0,Case_nature_1
0,18,1,12500,1,1,0,0,0,0,0,0,0,0,0,1,0,1
1,18,1,100000,0,1,0,0,0,0,0,0,0,0,0,1,0,1
2,18,1,72080,0,1,0,0,0,0,0,0,0,0,0,1,0,1
3,18,1,50000,0,1,0,0,0,0,0,0,0,0,0,1,1,0
4,18,5,4500,1,0,1,0,0,0,0,0,0,1,0,0,0,1
5,18,5,4500,1,0,1,0,0,0,0,0,0,1,0,0,0,1
6,18,5,12500,1,0,1,0,0,0,0,0,0,1,0,0,0,1
7,18,5,40000,1,0,1,0,0,0,0,0,0,1,0,0,0,1
8,18,5,4500,1,0,1,0,0,0,0,0,0,1,0,0,1,0
9,18,5,49066,0,0,1,0,0,0,0,0,0,1,0,0,0,1


In [68]:
# splitting into train:validation:test
data_split = [0.6, 0.2, 0.2]
N = mod2_data.shape[0]
train_size = int(N*data_split[0])
valid_size = int(N*data_split[1])

train = mod2_data[:train_size]
valid = mod2_data[train_size: train_size + valid_size]
test = mod2_data[train_size+valid_size:]

print("train:", train.shape)
print("valid:", valid.shape)
print("test:", test.shape)

Ytrain = train.loc[:, 'Case_accepted']
Xtrain = train.drop(columns='Case_accepted')

Yvalid = valid.loc[:, 'Case_accepted']
Xvalid = valid.drop(columns='Case_accepted')

Ytest = test.loc[:, 'Case_accepted']
Xtest = test.drop(columns='Case_accepted')

train: (10, 17)
valid: (3, 17)
test: (4, 17)


In [72]:
Ytest

13    1
14    0
15    1
16    0
Name: Case_accepted, dtype: int64

In [73]:
# defining dataset 
class ArbitratorDataset(Dataset):
    def __init__(self, X, Y):
        self.x = X
        self.y = Y
        
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [76]:
# creating train and valid dataset
train_ds = ArbitratorDataset(Xtrain, Ytrain)
valid_ds = ArbitratorDataset(Xvalid, Yvalid)
test_ds = ArbitratorDataset(Xtest, Ytest)

# creating dataloaders
batch_size = 1
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=False) 
valid_dl = DataLoader(valid_ds, batch_size=batch_size, shuffle=False) 
test_dl = DataLoader(test_ds, batch_size=batch_size, shuffle=False) 


In [77]:
# defining model

class CaseArbitrationModel(nn.Module):
    def __init__(self, inp_size):
        super().__init__()  
        self.lin1 = nn.Linear(inp_size, 100)
        self.bn1 = nn.BatchNorm1d(100)
        
        self.lin2 = nn.Linear(100, 50)
        self.bn2 = nn.BatchNorm1d(50)
        
        self.lin3 = nn.Linear(50, 1)
        self.softmax = nn.Softmax()

        self.drops = nn.Dropout(0.3)

    def forward(self, x_cat, x_cont):
        x = F.relu(self.lin1(x))
        x = self.drops(x)
        x = self.bn1(x)
        
        x = F.relu(self.lin2(x))
        x = self.drops(x)
        x = self.bn2(x)
        
        x = self.lin3(x)
        x = self.softmax(x)
        
        return x
       