In [1]:
from torch.utils.data import DataLoader
import torch

In [2]:
t = torch.arange(7, dtype=torch.float32)
data_loader = DataLoader(t)

In [3]:
for item in data_loader:
    print(item)

tensor([0.])
tensor([1.])
tensor([2.])
tensor([3.])
tensor([4.])
tensor([5.])
tensor([6.])


In [4]:
data_loader = DataLoader(t, batch_size=3, drop_last=False, shuffle=True)

for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([1., 2., 6.])
batch 2: tensor([3., 5., 4.])
batch 3: tensor([0.])


In [5]:
torch.manual_seed(23)
data_loader = DataLoader(t, batch_size=3, drop_last=True, shuffle=True)
for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([0., 2., 6.])
batch 2: tensor([4., 5., 3.])


In [6]:
from torch.utils.data import Dataset

class JointDataset(Dataset):
    def __init__(self, x, y):
        self.x = x
        self.y = y
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [7]:
torch.manual_seed(23)

t_x = torch.rand([4, 3], dtype=torch.float32)
t_y = torch.arange(4)
joint_dataset = JointDataset(t_x, t_y)

for example in joint_dataset:
    print('  x: ', example[0],
          '  y: ', example[1])

  x:  tensor([0.4283, 0.2889, 0.4224])   y:  tensor(0)
  x:  tensor([0.3571, 0.9577, 0.1100])   y:  tensor(1)
  x:  tensor([0.2933, 0.9205, 0.5876])   y:  tensor(2)
  x:  tensor([0.1299, 0.6729, 0.1028])   y:  tensor(3)


 Or use Class `TensorDataset` directly

In [8]:
torch.manual_seed(23)
from torch.utils.data import TensorDataset
joint_dataset = TensorDataset(t_x, t_y)

for example in joint_dataset:
    print('  x: ', example[0],
          '  y: ', example[1])

  x:  tensor([0.4283, 0.2889, 0.4224])   y:  tensor(0)
  x:  tensor([0.3571, 0.9577, 0.1100])   y:  tensor(1)
  x:  tensor([0.2933, 0.9205, 0.5876])   y:  tensor(2)
  x:  tensor([0.1299, 0.6729, 0.1028])   y:  tensor(3)


In [9]:
torch.manual_seed(1)
data_loader = DataLoader(dataset=joint_dataset, batch_size=2, shuffle=True)

for i, batch in enumerate(data_loader, 1):
        print(f'batch {i}:', 'x:', batch[0],
              '\n         y:', batch[1])

for epoch in range(2):
    print(f'epoch {epoch+1}')
    for i, batch in enumerate(data_loader, 1):
        print(f'batch {i}:', 'x:', batch[0],
              '\n         y:', batch[1])

batch 1: x: tensor([[0.2933, 0.9205, 0.5876],
        [0.4283, 0.2889, 0.4224]]) 
         y: tensor([2, 0])
batch 2: x: tensor([[0.3571, 0.9577, 0.1100],
        [0.1299, 0.6729, 0.1028]]) 
         y: tensor([1, 3])
epoch 1
batch 1: x: tensor([[0.4283, 0.2889, 0.4224],
        [0.2933, 0.9205, 0.5876]]) 
         y: tensor([0, 2])
batch 2: x: tensor([[0.3571, 0.9577, 0.1100],
        [0.1299, 0.6729, 0.1028]]) 
         y: tensor([1, 3])
epoch 2
batch 1: x: tensor([[0.1299, 0.6729, 0.1028],
        [0.2933, 0.9205, 0.5876]]) 
         y: tensor([3, 2])
batch 2: x: tensor([[0.4283, 0.2889, 0.4224],
        [0.3571, 0.9577, 0.1100]]) 
         y: tensor([0, 1])


In [10]:
import pandas as pd
import numpy as np
import torch.nn as nn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler

In [11]:
df = pd.read_csv("C:/Training/Academy/Statistics (Python)/Cases/human-resources-analytics/HR_comma_sep.csv")
dum_df = pd.get_dummies(df,drop_first=True)
dum_df.head()

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,left,promotion_last_5years,Department_RandD,Department_accounting,Department_hr,Department_management,Department_marketing,Department_product_mng,Department_sales,Department_support,Department_technical,salary_low,salary_medium
0,0.38,0.53,2,157,3,0,1,0,False,False,False,False,False,False,True,False,False,True,False
1,0.8,0.86,5,262,6,0,1,0,False,False,False,False,False,False,True,False,False,False,True
2,0.1,0.77,6,247,4,0,1,0,False,False,False,False,False,False,True,False,False,True,False
3,0.92,0.85,5,259,5,0,1,0,False,False,False,False,False,False,True,False,False,True,False
4,0.89,1.0,5,224,5,0,1,0,False,False,False,False,False,False,True,False,False,True,False


In [12]:
X = dum_df.drop('left', axis=1)
scaler = MinMaxScaler()

y = dum_df['left'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, 
                                                    random_state=23,stratify=y)
X_scl_trn = scaler.fit_transform(X_train) 
X_scl_tst = scaler.transform(X_test) 

In [13]:
X_torch = torch.from_numpy(X_scl_trn)
y_torch = torch.from_numpy(y_train)
print(X_torch.size())
print(y_torch.size())

torch.Size([10496, 18])
torch.Size([10496])


In [14]:
data_loader = DataLoader(y_torch, batch_size=30, drop_last=False)

for i, batch in enumerate(data_loader, 1):
    print(f'batch {i}:', batch)

batch 1: tensor([0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1,
        0, 0, 1, 0, 1, 0])
batch 2: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 0, 1, 0, 0])
batch 3: tensor([1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
        1, 0, 1, 0, 0, 0])
batch 4: tensor([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 1, 0, 0, 0, 1])
batch 5: tensor([0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0,
        1, 0, 1, 0, 1, 0])
batch 6: tensor([0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        1, 1, 0, 0, 0, 0])
batch 7: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
        0, 0, 0, 1, 0, 0])
batch 8: tensor([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0])
batch 9: tensor([0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0

In [15]:
from torch.utils.data import TensorDataset
joint_dataset = TensorDataset(X_torch.float(), y_torch.float())

In [16]:
type(joint_dataset)

torch.utils.data.dataset.TensorDataset

In [17]:
torch.manual_seed(23)
data_loader = DataLoader(dataset=joint_dataset, batch_size=20, shuffle=True)

for i, batch in enumerate(data_loader, 1):
        print(f'batch {i}:', 'x:', batch[0].shape,
              '\n         y:', batch[1].shape)



batch 1: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 2: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 3: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 4: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 5: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 6: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 7: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 8: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 9: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 10: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 11: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 12: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 13: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 14: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 15: x: torch.Size([20, 18]) 
         y: torch.Size([20])
batch 16: x: torch.Size([20, 18]) 
         y: to

In [18]:
# Create a model
model = nn.Sequential(nn.Linear(in_features=X_scl_trn.shape[1], out_features=5),
                      nn.ReLU(),
                      nn.Linear(5, 3),
                      nn.ReLU(),
                      nn.Linear(3,1),
                      nn.Sigmoid())

In [19]:
criterion = torch.nn.BCELoss()
# Construct the optimizer (Adam in this case)
optimizer = torch.optim.SGD(model.parameters(), lr = 0.001)
optimizer

SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0
    nesterov: False
    weight_decay: 0
)

Prediction with Default Weights

In [20]:
y_pred = model(X_torch.float())
y_torch = y_torch.unsqueeze(1)
print(y_torch.shape)
print(y_pred.shape)

torch.Size([10496, 1])
torch.Size([10496, 1])


In [21]:
for epoch in range(2):
    print(f'epoch {epoch+1}')
    for i, batch in enumerate(data_loader, 1):
        #print(f'batch {i}:', 'x:', batch[0],
         #     '\n         y:', batch[1])
        print((batch[0].shape, batch[1].shape))

epoch 1
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.S

(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20, 18]), torch.Size([20]))
(torch.Size([20,

In [22]:
# Gradient Descent

for epoch in np.arange(0,100):
   for i, batch in enumerate(data_loader, 1):
      # Forward pass: Compute predicted y by passing x to the model
      y_pred_prob = model(batch[0].float())

      # Compute and print loss
      loss = criterion(y_pred_prob, batch[1].float().unsqueeze(1))

      # Zero gradients, perform a backward pass, and update the weights.
      optimizer.zero_grad()

      # perform a backward pass (backpropagation)
      loss.backward()

      # Update the parameters
      optimizer.step()
   print('epoch: ', epoch+1,' loss: ', loss.item())

#print('epoch: ', epoch+1,' loss: ', loss.item())

epoch:  1  loss:  0.749776303768158
epoch:  2  loss:  0.7082245945930481
epoch:  3  loss:  0.6784613132476807
epoch:  4  loss:  0.6552145481109619
epoch:  5  loss:  0.6279225945472717
epoch:  6  loss:  0.5549004673957825
epoch:  7  loss:  0.4831380248069763
epoch:  8  loss:  0.5239942669868469
epoch:  9  loss:  0.5702289342880249
epoch:  10  loss:  0.5584303140640259
epoch:  11  loss:  0.5609802007675171
epoch:  12  loss:  0.4287409484386444
epoch:  13  loss:  0.492969810962677
epoch:  14  loss:  0.491044819355011
epoch:  15  loss:  0.4139052629470825
epoch:  16  loss:  0.4018683135509491
epoch:  17  loss:  0.7446524500846863
epoch:  18  loss:  0.4021958112716675
epoch:  19  loss:  0.3927900791168213
epoch:  20  loss:  0.5515767931938171
epoch:  21  loss:  0.40436309576034546
epoch:  22  loss:  0.6360234022140503
epoch:  23  loss:  0.6268173456192017
epoch:  24  loss:  0.4906708598136902
epoch:  25  loss:  0.5622594952583313
epoch:  26  loss:  0.5722342729568481
epoch:  27  loss:  0.60

In [23]:
X_torch_tst = torch.from_numpy(X_scl_tst)
y_torch_tst = torch.from_numpy(y_test)
y_torch_tst = y_torch_tst.unsqueeze(1)
print(y_torch_tst.shape)

torch.Size([4499, 1])


Prediction with Final Weights

In [24]:
y_pred = model(X_torch_tst.float())
y_pred[:5]

tensor([[0.2393],
        [0.3174],
        [0.1889],
        [0.1498],
        [0.1900]], grad_fn=<SliceBackward0>)

In [25]:
y_pred.shape, y_test.shape

(torch.Size([4499, 1]), (4499,))

In [26]:
type(y_pred.detach().numpy())

numpy.ndarray

In [27]:
y_pred = y_pred.detach().numpy()
y_pred.shape

(4499, 1)

In [28]:
from sklearn.metrics import log_loss
log_loss(y_test, y_pred)

0.44266847567778744