## Model definition

In [None]:
#!pip install -q numpy torch --index-url https://download.pytorch.org/whl/cpu
!pip install -q scikit-learn pandas pip install torch-directml

: 

In [None]:
import torch

device = torch.device("dml" if torch.has_dml else "cpu")

### Define a model

In [152]:
import torch.nn as nn

class GenderClassifier(nn.Module):

    def __init__(self, input_size: int, hidden1_size: int, hidden2_size: int, output_size: int):
        super(GenderClassifier, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden1_size)
        self.fc2 = nn.Linear(hidden1_size, hidden2_size)
        self.fc3 = nn.Linear(hidden2_size, output_size)

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.softmax(self.fc3(x))

        return x


### Load and prepare dataset

In [153]:
from torch.utils.data import DataLoader, TensorDataset, random_split
import torch
import pandas as pd

df = pd.read_csv("data/hr_data_gold.csv")

label = 'Gender_Male'
features = [c for c in df.columns if c != label]

# create tensors
X = torch.tensor(df[features].values, dtype=torch.float32).to_device(device)
y = torch.tensor(df[label], dtype=torch.long).to_device(device)

# create tensor dataframe
ds = TensorDataset(X, y)

# split data
train_size = int(0.8 * len(ds))
test_size = len(ds) - train_size
ds_train, ds_test = random_split(ds, [train_size, test_size])

# create loaders
all_loader  = DataLoader(ds, batch_size=1, shuffle=False)
train_loader = DataLoader(ds_train, batch_size=8, shuffle=True)
test_loader = DataLoader(ds_test, batch_size=1, shuffle=True)

### Learn model 48 x 24

In [None]:
import torch.optim as optim
from sklearn.metrics import accuracy_score

model = GenderClassifier(len(features), 48, 24, 2)
model = model.to_device(device)

train_accuracy_log = []
test_accuracy_log = []

optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
epochs = 1000

for epoch in range(epochs):
    # train
    model.train()
    for X_batch, y_batch in train_loader:
        optimizer.zero_grad()
        output = model(X_batch)
        loss = criterion(output, y_batch)
        loss.backward()
        optimizer.step()

    # calculate train_ds and test_ds accuracy
    with torch.no_grad():
        model.eval()
        train_features, train_labels = ds_train[:][0], ds_train[:][1]
        # Process all features at once
        train_pred = model(train_features).argmax(dim=1)
        train_accuracy = accuracy_score(train_labels, train_pred)
        train_accuracy_log.append(train_accuracy)

        test_features, test_labels = ds_test[:][0], ds_test[:][1]
        # Process all features at once
        test_pred = model(test_features).argmax(dim=1)
        test_accuracy = accuracy_score(test_labels, test_pred)
        test_accuracy_log.append(test_accuracy)

    
    print(f"Progress: {epoch / epochs * 100:.1f} %")


### Learning curve

In [141]:
!pip install -q matplotlib seaborn

In [None]:
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns
x = list(range(1, epochs + 1))

plt.plot(x, train_accuracy_log, label="Train accuracy")
plt.plot(x, test_accuracy_log, label="Test accuracy")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Learning Curve")
plt.grid(axis = 'y')
plt.show()

### Validation
- Accuracy
- Precusion
- Recall
- Confusion Matrix

In [None]:
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for X, y in all_loader:
        outputs = model(X)
        preds = torch.argmax(outputs, dim=1)
        all_labels.append(y)
        all_preds.append(preds)

accuracy = accuracy_score(all_labels, all_preds)
precision = precision_score(all_labels, all_preds)
recall = recall_score(all_labels, all_preds)
f1 = f1_score(all_labels, all_preds)
conf_matrix = confusion_matrix(all_labels, all_preds)

sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', 
            xticklabels=["Female", "Male"], yticklabels=["Female", "Male"])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')

plt.show()
print(f"Accuracy:  {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall:    {recall:.4f}")
print(f"F1 Score:  {f1:.4f}")

In [129]:
torch.save(model, "model_24_48_b8_e1000.pt")

In [124]:
model1 = torch.load("model.pt", weights_only=False)