In [8]:
import random

Below is the code to divide the data into training data and validation data. The validation will be 250 out of the total data set and training data set will be the rest.

In [9]:
filename = 'Employee.csv'

# Open the file and store it contents in a list not including the header
with open(filename, 'r') as file:
    lines = file.readlines()
    header = lines[0]
    lines = lines[1:]

# pick out a random 250 row from the lines and store it in a new file
# then with the unpicked row, store them in a file called training_data.csv
random.seed(0)
random.shuffle(lines)
test_data = lines[:250]
training_data = lines[250:]

with open('test_data.csv', 'w') as file:
    file.write(header)
    for line in test_data:
        file.write(line)
        
with open('training_data.csv', 'w') as file:
    file.write(header)
    for line in training_data:
        file.write(line)

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim

In our neural network model, it has 7 input and 2 output

### Inputs

1. edu - [Bachelors: 0, Masters: 0.5, Phd: 1] - ordinal
2. paymentTier - [1, 2, 3] - ordinal
3. age - max: 41, min: 22
4. isMale - 0 | 1
5. isFemale - 0 | 1
6. everBenched - 0 | 1
8. experience - max: 7, min: 0


### Output - [softmax]

1. Leave
2. Not Leave

In [3]:
# Below will be the code for the neural network model

class MyNetwork(nn.Module):
    def __init__(self):
        super(MyNetwork, self).__init__()
        self.hidden1 = nn.Linear(7, 4)
        self.hidden2 = nn.Linear(4, 4)
        self.output = nn.Linear(4, 2)

    def forward(self, x):
        x = torch.sigmoid(self.hidden1(x))
        x = torch.sigmoid(self.hidden2(x))
        x = self.output(x)
        return x

In [5]:
from enum import Enum

class EduLevel(Enum):
    BACHELORS = "Bachelors"
    MASTERS = "Masters"
    PHD = "PHD"

edu_normalize = {
    EduLevel.BACHELORS: 0,
    EduLevel.MASTERS: 0.5,
    EduLevel.PHD: 1
}

payment_tiers_normalize = {
    "1": 0,
    "2": 0.5,
    "3": 1
}

experience_normalize = lambda x : x / 7
age_min = 22
age_max = 41
age_normalize = lambda x : (x - age_min) / (age_max - age_min)

In [6]:
learning_rate = 1e-9
epoch = 50


In [7]:
model = MyNetwork()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [8]:
import csv
from typing import Dict, Literal, List, Iterator

DatasetHeaders = Literal['Education', 'JoiningYear', 'City', 'PaymentTier', 'Age', 'Gender', 'EverBenched', 'ExperienceInCurrentDomain', 'LeaveOrNot']
DatasetRow = Dict[DatasetHeaders, List[str]]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
criterion.to(device)

for epoch in range(epoch):
    with open('training_data.csv', 'r', newline='') as training_file:
        training_reader: Iterator[DatasetRow] = csv.DictReader(training_file)
        for i, row in enumerate(training_reader):
            training_row = torch.tensor([
                edu_normalize[EduLevel(row['Education'])],
                payment_tiers_normalize[row['PaymentTier']],
                age_normalize(int(row['Age'])),
                1 if row['Gender'] == 'Male' else 0,
                1 if row['Gender'] == 'Female' else 0,
                1 if row['EverBenched'] == 'Yes' else 0,
                experience_normalize(int(row['ExperienceInCurrentDomain']))
            ]).float().to(device)
            target = torch.tensor([
                1 if row['LeaveOrNot'] == '1' else 0,
                1 if row['LeaveOrNot'] == '0' else 0
            ]).float().to(device)
            
            # unsqueeze
            training_row = training_row.unsqueeze(0)
            target = target.unsqueeze(0)
            
            optimizer.zero_grad()
            
            outputs = model(training_row)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()
            
            if i % 10 == 9:  # Print every 10 mini-batches
                print(f'Epoch: {epoch + 1}, Row {i + 1}, Loss: {loss.item() / 10:.10f}')
        else:
            print(f'Epoch: {epoch + 1}, Row {i + 1}, Loss: {loss.item() / 10:.10f}')

model_path = './model,epoch50,lr1e-9.bin'
torch.save(model.state_dict(), model_path)



Epoch: 1, Row 10, Loss: 0.0462563396
Epoch: 1, Row 20, Loss: 0.0461076796
Epoch: 1, Row 30, Loss: 0.0997534573
Epoch: 1, Row 40, Loss: 0.0459609300
Epoch: 1, Row 50, Loss: 0.0459336519
Epoch: 1, Row 60, Loss: 0.0460383296
Epoch: 1, Row 70, Loss: 0.0461973518
Epoch: 1, Row 80, Loss: 0.1002083778
Epoch: 1, Row 90, Loss: 0.0461644650
Epoch: 1, Row 100, Loss: 0.0459303677
Epoch: 1, Row 110, Loss: 0.0998581052
Epoch: 1, Row 120, Loss: 0.0996595204
Epoch: 1, Row 130, Loss: 0.0999551177
Epoch: 1, Row 140, Loss: 0.0459724098
Epoch: 1, Row 150, Loss: 0.0462563396
Epoch: 1, Row 160, Loss: 0.0464786798
Epoch: 1, Row 170, Loss: 0.0997115433
Epoch: 1, Row 180, Loss: 0.0458862871
Epoch: 1, Row 190, Loss: 0.0995886564
Epoch: 1, Row 200, Loss: 0.0462195128
Epoch: 1, Row 210, Loss: 0.0460697919
Epoch: 1, Row 220, Loss: 0.0461973518
Epoch: 1, Row 230, Loss: 0.1000232697
Epoch: 1, Row 240, Loss: 0.0992536306
Epoch: 1, Row 250, Loss: 0.0997071803
Epoch: 1, Row 260, Loss: 0.0460231185
Epoch: 1, Row 270, Lo