In [15]:
# https://stanford.edu/~shervine/blog/pytorch-how-to-generate-data-parallel
import torch
from torch.utils import data
import pandas as pd

class TestTrainSplit(data.Dataset):
    'Characterizes a dataset for PyTorch'
    def __init__(self, dataframe_x, features, dataframe_y, labels):
        'Initialization'
        super(TestTrainSplit, self).__init__()
        self.dataframe_x = dataframe_x
        self.dataframe_y = dataframe_y
        self.labels = labels
        self.list_IDs = features

    def __len__(self):
        'Denotes the total number of samples'
        return self.dataframe_x.shape[0]

    def __getitem__(self, index):
        'Generates one sample of data'
        # Load data and get label
        X = self.dataframe_x[self.list_IDs].to_numpy()[index]
        y = self.dataframe_y.to_numpy()[index]

        return X, y

In [16]:
# Parameters
params = {'batch_size': 64,
          'shuffle': True,
          'num_workers': 2}

In [17]:
import numpy as np
df = pd.read_csv('HWA-Brovsi-WSA-Claims-Validated.csv', dtype={'RISK': np.float})
df = df.dropna()
df = df.drop(['Index'], axis=1)
df['ClaimTime'] = pd.DatetimeIndex(pd.to_datetime(df['CLM_DT'], format='%d/%m/%y') - pd.to_datetime(df['RCPT_DT'], format='%d/%m/%y')).day
df['DistriTime'] = pd.DatetimeIndex(pd.to_datetime(df['EA_DT'], format='%d/%m/%y') - pd.to_datetime(df['RCPT_DT'], format='%d/%m/%y')).day

  """
  


In [18]:
df.columns

Index(['EMP_ID', 'CLM_SYS', 'CLM_REF', 'WSA_TYP', 'FLEXBEN_TYPE', 'CLM_DT',
       'CLM_YR', 'CLM_AMT', 'RCPT_DT', 'RCPT_DAY', 'DAY_TAG', 'CLM.STAT',
       'REIMB_DT', 'REIMB_YR', 'EA_DT', 'RISK', 'ClaimTime', 'DistriTime'],
      dtype='object')

In [19]:
y_value = df['RISK']
x_value = df.drop(['RISK'], axis=1)

In [20]:
y_value

0        1.0
1        1.0
2        1.0
3        1.0
4        1.0
        ... 
31652    1.0
31654    0.0
31656    1.0
31660    1.0
31662    1.0
Name: RISK, Length: 17271, dtype: float64

In [21]:
x_value = pd.get_dummies(x_value)

In [22]:
feature_list = list(x_value.columns)
feature_list

['EMP_ID',
 'FLEXBEN_TYPE',
 'CLM_YR',
 'CLM_AMT',
 'CLM.STAT',
 'REIMB_YR',
 'ClaimTime',
 'DistriTime',
 'CLM_SYS_WSA',
 'CLM_REF_EFSA0087157',
 'CLM_REF_EFSA0087160',
 'CLM_REF_EFSA0087162',
 'CLM_REF_EFSA0087163',
 'CLM_REF_EFSA0087164',
 'CLM_REF_EFSA0087169',
 'CLM_REF_EFSA0087170',
 'CLM_REF_EFSA0087176',
 'CLM_REF_EFSA0087178',
 'CLM_REF_EFSA0087180',
 'CLM_REF_EFSA0087181',
 'CLM_REF_EFSA0087182',
 'CLM_REF_EFSA0087183',
 'CLM_REF_EFSA0087184',
 'CLM_REF_EFSA0087188',
 'CLM_REF_EFSA0087189',
 'CLM_REF_EFSA0087190',
 'CLM_REF_EFSA0087191',
 'CLM_REF_EFSA0087192',
 'CLM_REF_EFSA0087195',
 'CLM_REF_EFSA0087198',
 'CLM_REF_EFSA0087199',
 'CLM_REF_EFSA0087203',
 'CLM_REF_EFSA0087206',
 'CLM_REF_EFSA0087207',
 'CLM_REF_EFSA0087208',
 'CLM_REF_EFSA0087209',
 'CLM_REF_EFSA0087210',
 'CLM_REF_EFSA0087211',
 'CLM_REF_EFSA0087212',
 'CLM_REF_EFSA0087213',
 'CLM_REF_EFSA0087214',
 'CLM_REF_EFSA0087215',
 'CLM_REF_EFSA0087216',
 'CLM_REF_EFSA0087222',
 'CLM_REF_EFSA0087223',
 'CLM_REF_EFSA

In [23]:
'RISK' in list(x_value.columns)

False

In [24]:
training_set = TestTrainSplit(x_value,feature_list, y_value, ['RISK'])

In [25]:
training_generator = data.DataLoader(training_set, **params)

In [26]:
training_generator.dataset[1]

(array([8.965e+03, 4.000e+00, 2.016e+03, ..., 0.000e+00, 0.000e+00,
        1.000e+00]),
 1.0)

In [27]:
import torch.nn as nn
import torch.nn.functional as F

class ANN(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(in_features=64, out_features=17)
        self.fc2 = nn.Linear(in_features=17, out_features=6)
        self.output = nn.Linear(in_features=6, out_features=2)
    
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.output(x)
        return x

In [28]:
model = ANN()
model

ANN(
  (fc1): Linear(in_features=64, out_features=17, bias=True)
  (fc2): Linear(in_features=17, out_features=6, bias=True)
  (output): Linear(in_features=6, out_features=2, bias=True)
)

In [29]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

In [30]:
training_generator.dataset[1]

(array([8.965e+03, 4.000e+00, 2.016e+03, ..., 0.000e+00, 0.000e+00,
        1.000e+00]),
 1.0)

In [31]:

# # epochs = 100
# loss_arr = []
# for i,j in training_generator:
#     y_hat = model.forward(i)
#     loss = criterion(y_hat, j)
#     loss_arr.append(loss)

# #     if i % 10 == 0:
# #        print(f'Epoch: i Loss: {loss}')

#     optimizer.zero_grad()
#     loss.backward()
#     optimizer.step()

In [37]:
y_hat = model.forward(torch.tensor(training_generator.dataset[0][0]))

RuntimeError: Expected object of scalar type Double but got scalar type Float for argument #2 'mat2' in call to _th_mm