In [1]:
import pandas as pd

train_path = r"C:\Users\paulw\Documents\graduation_competition\data\train.csv"
test_path = r"C:\Users\paulw\Documents\graduation_competition\data\test.csv"

train = pd.read_csv(train_path)

In [2]:
target = "Target"

In [3]:
train[target].value_counts(normalize=True)

Target
Graduate    0.474163
Dropout     0.330589
Enrolled    0.195248
Name: proportion, dtype: float64

In [4]:
categoricals = [
    "Marital status",
    "Application mode",
    "Application order",
    "Course",
    "Daytime/evening attendance",
    "Previous qualification",
    "Nacionality",
    "Mother's qualification",
    "Father's qualification",
    "Mother's occupation",
    "Father's occupation",
    "Displaced",
    "Educational special needs",
    "Debtor",
    "Tuition fees up to date",
    "Gender",
    "Scholarship holder",
    "International"
]

numericals = [
    "Previous qualification (grade)",
    "Admission grade",
    "Age at enrollment",
    "Curricular units 1st sem (credited)",
    "Curricular units 1st sem (enrolled)",
    "Curricular units 1st sem (evaluations)",
    "Curricular units 1st sem (approved)",
    "Curricular units 1st sem (grade)",
    "Curricular units 1st sem (without evaluations)",
    "Curricular units 2nd sem (credited)",
    "Curricular units 2nd sem (enrolled)",
    "Curricular units 2nd sem (evaluations)",
    "Curricular units 2nd sem (approved)",
    "Curricular units 2nd sem (grade)",
    "Curricular units 2nd sem (without evaluations)",
    "Unemployment rate",
    "Inflation rate",
    "GDP"
]
features = categoricals + numericals
train[[x for x in train.columns if x not in features]]

Unnamed: 0,id,Target
0,0,Graduate
1,1,Dropout
2,2,Dropout
3,3,Enrolled
4,4,Graduate
...,...,...
76513,76513,Graduate
76514,76514,Graduate
76515,76515,Enrolled
76516,76516,Dropout


In [5]:
train[categoricals].nunique()

Marital status                 6
Application mode              22
Application order              8
Course                        19
Daytime/evening attendance     2
Previous qualification        21
Nacionality                   18
Mother's qualification        35
Father's qualification        39
Mother's occupation           40
Father's occupation           56
Displaced                      2
Educational special needs      2
Debtor                         2
Tuition fees up to date        2
Gender                         2
Scholarship holder             2
International                  2
dtype: int64

In [6]:
train[numericals].nunique()

Previous qualification (grade)                     110
Admission grade                                    668
Age at enrollment                                   46
Curricular units 1st sem (credited)                 21
Curricular units 1st sem (enrolled)                 24
Curricular units 1st sem (evaluations)              36
Curricular units 1st sem (approved)                 23
Curricular units 1st sem (grade)                  1206
Curricular units 1st sem (without evaluations)      12
Curricular units 2nd sem (credited)                 20
Curricular units 2nd sem (enrolled)                 22
Curricular units 2nd sem (evaluations)              31
Curricular units 2nd sem (approved)                 21
Curricular units 2nd sem (grade)                  1234
Curricular units 2nd sem (without evaluations)      11
Unemployment rate                                   11
Inflation rate                                      13
GDP                                                 11
dtype: int

In [7]:
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np
from pydantic import BaseModel
from typing import Optional

# set seed
np.random.seed(42)

class MinShifter:

    def __init__(self):
        self.min: int = 0

    def fit(self, X: np.ndarray):
        self.min = X.min()

    def transform(self, X: np.ndarray):
        return X - self.min

class DatasetModel(BaseModel):

    numerical_feature_names: list[str]
    categorical_feature_names: list[str]
    numerical_features: np.ndarray
    categorical_features: np.ndarray
    target: Optional[np.ndarray] = None

    class Config:
        arbitrary_types_allowed = True

class PreProcessingPipeline:

    def __init__(
            self,
            dataset: pd.DataFrame,
            target_name: str,
            categorical_features: list[str],
            numerical_features: list[str],
            numerical_scaler: StandardScaler = StandardScaler,
            one_hot_encoder: OneHotEncoder = OneHotEncoder,
            val_split: float = 0.2,
            stratify: bool = True,
    ):
        self.dataset: pd.DataFrame = dataset.copy()
        self.target_name: str = target_name
        self.categorical_features: list[str] = categorical_features
        self.numerical_features: list[str] = numerical_features
        self.numerical_scaler: StandardScaler = numerical_scaler()
        self.one_hot_encoder: OneHotEncoder = one_hot_encoder()
        self.val_split: float = val_split
        self.stratify: bool = stratify
        self.categorical_transformers: dict[str, MinShifter] = {
            feature: MinShifter() for feature in self.categorical_features
        }

    def split(self):
        X = self.dataset.drop(columns=[self.target_name])
        y = self.dataset[self.target_name]

        X_train, X_val, y_train, y_val = train_test_split(
            X, y, test_size=self.val_split, stratify=y if self.stratify else None
        )

        return X_train, X_val, y_train, y_val
    
    def transform_target(self, y: np.ndarray, fit:bool = False)->np.ndarray:
        if fit:
            self.one_hot_encoder.fit(y.reshape(-1, 1))
        return self.one_hot_encoder.transform(y.reshape(-1, 1))
    
    def scale_numericals(self, X: np.ndarray, fit:bool = False)->np.ndarray:
        if fit:
            self.numerical_scaler.fit(X)
        return self.numerical_scaler.transform(X)
    
    def prepare_categoricals(self, X: np.ndarray, fit:bool = False)->np.ndarray:
        for k, feature in enumerate(self.categorical_features):
            if fit:
                self.categorical_transformers[feature].fit(X[:, k])
            X[:, k] = self.categorical_transformers[feature].transform(X[:, k])
        return X
    
    def _run_pipeline(self, dataset: DatasetModel, fit: bool)->DatasetModel:
        if dataset.target is not None:
            dataset.target = self.transform_target(dataset.target, fit)
        dataset.numerical_features = self.scale_numericals(dataset.numerical_features, fit)
        dataset.categorical_features = self.prepare_categoricals(dataset.categorical_features, fit)
        return dataset
    
    def dataset_pipeline(self)->tuple[DatasetModel, DatasetModel]:
        X_train, X_val, y_train, y_val = self.split()
        
        self.train = DatasetModel(
            numerical_feature_names=self.numerical_features,
            categorical_feature_names=self.categorical_features,
            numerical_features=X_train[self.numerical_features].values,
            categorical_features=X_train[self.categorical_features].values,
            target=y_train.values
        )

        self._run_pipeline(self.train, True)

        self.val = DatasetModel(
            numerical_feature_names=self.numerical_features,
            categorical_feature_names=self.categorical_features,
            numerical_features=X_val[self.numerical_features].values,
            categorical_features=X_val[self.categorical_features].values,
            target=y_val.values
        )

        self._run_pipeline(self.val, False)

        return self.train, self.val
    
    def run_pipeline_new_data(self, data: pd.DataFrame)->DatasetModel:
        dataset = DatasetModel(
            numerical_feature_names=self.numerical_features,
            categorical_feature_names=self.categorical_features,
            numerical_features=data[self.numerical_features].values,
            categorical_features=data[self.categorical_features].values,
        )

        return self._run_pipeline(dataset, False)
    
    def get_class_labels(self, predictions: np.ndarray)->np.ndarray:
        output_classes = self.one_hot_encoder.categories_[0].shape[0]
        predictions_ints = np.argmax(predictions, axis=1)
        predictions_ohe = np.zeros((len(predictions_ints), output_classes))
        predictions_ohe[np.arange(len(predictions_ints)), predictions_ints] = 1
        labels = self.one_hot_encoder.inverse_transform(predictions_ohe)
        return labels

In [8]:
preprocessing = PreProcessingPipeline(
    dataset=train,
    target_name=target,
    categorical_features=categoricals,
    numerical_features=numericals,
    val_split=0.2
)

train_data, val_data = preprocessing.dataset_pipeline()

In [9]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
import json
import os

# set seed
torch.manual_seed(42)

class CategoryEmbedding(nn.Module):

    def __init__(
            self,
            num_embeddings: int,
            dropout: float = 0.1,
            init_mode: str = "fan_out",
            init_nonlinearity: str = "relu"
    ):
        super(CategoryEmbedding, self).__init__()
        self.embedding_dim = num_embeddings // 2
        self.num_embeddings = num_embeddings
        self.embedding = nn.Embedding(
            num_embeddings=self.num_embeddings,
            embedding_dim=self.embedding_dim
        )
        nn.init.kaiming_normal_(self.embedding.weight, mode=init_mode, nonlinearity=init_nonlinearity)
        self.dropout = nn.Dropout(dropout)
        self.relu = nn.ReLU()

    def forward(self, x: torch.Tensor):
        x = torch.where(x < self.num_embeddings, x, torch.tensor(self.num_embeddings - 1, dtype=torch.long))
        x = torch.where(x < 0, x, torch.tensor(0, dtype=torch.long))
        # return self.dropout(self.relu(self.embedding(x)))
        return self.dropout(self.embedding(x))

    def trainable(self, trainable: bool):
        self.embedding.weight.requires_grad = trainable

class FFUnit(nn.Module):

    def __init__(
            self,
            in_features: int,
            out_features: int,
            dropout: float = 0.1,
            init_mode: str = "fan_in",
            init_nonlinearity: str = "relu"
    ):
        super(FFUnit, self).__init__()
        self.linear = nn.Linear(in_features, out_features)
        nn.init.kaiming_normal_(self.linear.weight, mode=init_mode, nonlinearity=init_nonlinearity)
        self.batch_norm = nn.BatchNorm1d(out_features)
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

    def forward(self, x: torch.Tensor):
        return self.dropout(self.activation(self.batch_norm(self.linear(x))))

class ANN(nn.Module):

    def __init__(
            self,
            numerical_features: int,
            categorical_max_values: list[int],
            output_classes: int,
            hidden_units: list[int],
            dropout: float = 0.05
    ):
        super(ANN, self).__init__()
        self.categorical_embeddings = nn.ModuleList([
            CategoryEmbedding(max_val + 1, dropout) for max_val in categorical_max_values
        ])
        self.numerical_features = numerical_features
        # self.numerical_ff_unit = FFUnit(
        #     in_features=numerical_features,
        #     out_features=numerical_features,
        #     dropout=dropout,
        #     init_mode="fan_out",
        # )
        self.categorical_features = sum(embedding.embedding_dim for embedding in self.categorical_embeddings)
        # self.categorical_ff_unit = FFUnit(
        #     in_features=self.categorical_features,
        #     out_features=self.categorical_features,
        #     dropout=dropout
        # )
        self.hidden_units = hidden_units
        self.ff_units = nn.ModuleList([
            FFUnit(
                in_features=self.numerical_features + self.categorical_features,
                out_features=hidden_units[0],
                dropout=dropout,
                init_mode="fan_in",
            )
        ])
        for i in range(1, len(hidden_units)):
            self.ff_units.append(
                FFUnit(
                    in_features=hidden_units[i - 1],
                    out_features=hidden_units[i],
                    dropout=dropout,
                    init_mode="fan_in",
                )
            )
        self.output = nn.Linear(hidden_units[-1], output_classes)
        nn.init.kaiming_normal_(self.output.weight, mode="fan_in")
        self.output_activation = nn.Softmax(dim=1)

    def forward(self, numerical: torch.Tensor, categorical: torch.Tensor):
        assert categorical.shape[1] == len(self.categorical_embeddings), "Mismatch between input data and model embeddings: {} != {}".format(categorical.shape[1], len(self.categorical_embeddings))
        embeddings = [self.categorical_embeddings[i](categorical[:, i]) for i in range(len(self.categorical_embeddings))]
        embeddings = torch.cat(embeddings, dim=1)
        # embeddings = self.categorical_ff_unit(embeddings)
        # numerical = self.numerical_ff_unit(numerical)
        x = torch.cat([numerical, embeddings], dim=1)
        for unit in self.ff_units:
            x = unit(x)
        logits = self.output(x)
        return logits

    def predict(self, numerical: torch.Tensor, categorical: torch.Tensor):
        return self.output_activation(self(numerical, categorical))
    
    def trainable_embeddings(self, trainable: bool):
        for embedding in self.categorical_embeddings:
            embedding.trainable(trainable)

    def _create_config(self) -> dict:
        categorical_max_values = [int(emb.num_embeddings - 1) for emb in self.categorical_embeddings]
        return {
            "numerical_features": self.numerical_features,
            "categorical_max_values": categorical_max_values,
            "hidden_units": self.hidden_units,
            "output_classes": self.output.out_features
        }

    def save_pretrained(self, model_save_path: str):
        # Create directory if it doesn't exist
        os.makedirs(model_save_path, exist_ok=True)

        # Save model state
        torch.save(self.state_dict(), f"{model_save_path}/model.pth")

        # Save config
        config_path = f"{model_save_path}/config.json"
        with open(config_path, 'w') as f:
            json.dump(self._create_config(), f)

    @classmethod
    def load_pretrained(cls, model_save_path: str):
        # Load config
        config_path = f"{model_save_path}/config.json"
        with open(config_path, 'r') as f:
            config = json.load(f)

        # Create model
        model = cls(
            numerical_features=config["numerical_features"],
            categorical_max_values=config["categorical_max_values"],
            hidden_units=config["hidden_units"],
            output_classes=config["output_classes"]
        )

        # Load model state
        model_path = f"{model_save_path}/model.pth"
        map_location = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model.load_state_dict(torch.load(model_path, map_location=map_location))

        return model
    

class Dataset(Dataset):
    
        def __init__(
                self,
                numerical_features: np.ndarray,
                categorical_features: np.ndarray,
                target: Optional[np.ndarray] = None
        ):
            self.numerical_features = numerical_features
            self.categorical_features = categorical_features
            self.target = target
    
        def __len__(self):
            return self.numerical_features.shape[0]
    
        def __getitem__(self, idx):
            if self.target is not None:
                return {
                    "numerical": torch.tensor(self.numerical_features[idx], dtype=torch.float32),
                    "categorical": torch.tensor(self.categorical_features[idx], dtype=torch.long),
                    "target": torch.tensor(self.target.toarray()[idx], dtype=torch.float32)
                }
            else:
                return {
                    "numerical": torch.tensor(self.numerical_features[idx], dtype=torch.float32),
                    "categorical": torch.tensor(self.categorical_features[idx], dtype=torch.long)
                }
        
train_dataset = Dataset(
    numerical_features=train_data.numerical_features,
    categorical_features=train_data.categorical_features,
    target=train_data.target
)

val_dataset = Dataset(
    numerical_features=val_data.numerical_features,
    categorical_features=val_data.categorical_features,
    target=val_data.target
)

In [11]:
from sklearn.metrics import accuracy_score, f1_score
from tqdm.auto import tqdm
import os

class Trainer:

    def __init__(
            self,
            model: nn.Module,
            optimizer: torch.optim.Optimizer,
            criterion: nn.Module,
            train_dataset: Dataset,
            val_dataset: Dataset,
            model_save_path: str,
            batch_size: int = 32,
            n_epochs: int = 100,
            gradient_accumulation_steps: int = 1,
            eval_steps: int|float = 10,
            early_stopping_patience: int = 3
    ):
        self.model = model
        self.optimizer = optimizer
        self.criterion = criterion
        self.train_dataset = train_dataset
        self.val_dataset = val_dataset
        self.train_loader: DataLoader = None
        self.val_loader: DataLoader = None
        self.batch_size = batch_size
        self.n_epochs = n_epochs
        self.gradient_accumulation_steps = gradient_accumulation_steps
        self.eval_steps = eval_steps
        self.early_stopping_patience = early_stopping_patience
        self.best_loss: float = float("inf")
        self.model_save_path = model_save_path
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        if not os.path.exists(self.model_save_path):
            os.makedirs(self.model_save_path)

    def _process_batch(
            self,
            batch: dict, 
            step: int
    ):
        numerical = batch["numerical"]
        categorical = batch["categorical"]
        target = batch["target"]
        
        numerical = numerical.to(self.device)
        categorical = categorical.to(self.device)
        target = target.to(self.device)

        self.optimizer.zero_grad()
        output = self.model(numerical, categorical)
        loss = self.criterion(output, target)
        loss.backward()
        if step % self.gradient_accumulation_steps == 0:
            self.optimizer.step()
        return loss.item()
    
    def _evaluate(
            self
    ):
        self.model.eval()
        targets = []
        predictions = []
        total_loss = 0
        progress_bar = tqdm(enumerate(self.val_loader), total=len(self.val_loader), desc=f"Validating...", leave=False, position=1)
        with torch.no_grad():
            for batch in self.val_loader:
                progress_bar.update(1)
                numerical = batch["numerical"]
                categorical = batch["categorical"]
                target = batch["target"]
                numerical = numerical.to(self.device)
                categorical = categorical.to(self.device)
                target = target.to(self.device)
                output = self.model(numerical, categorical)
                output = output.cpu()
                target = target.cpu()
                loss = self.criterion(output, target)
                targets.extend(target.numpy())
                predictions.extend(output.numpy())
                total_loss += loss.item()
        return targets, predictions, total_loss / len(self.val_loader)
    
    def _run_epoch(
            self,
            epoch: int,
            step: int,
            patience: int
    ):
        self.model.train()
        self.model.to(self.device)
        total_loss = 0
        progress_bar = tqdm(enumerate(self.train_loader), total=len(self.train_loader), desc=f"Epoch {epoch+1}", leave=False, position=0)
        for batch in self.train_loader:
            progress_bar.update(1)
            loss = self._process_batch(batch, step)
            total_loss += loss
            if step % self.eval_steps == 0 and step != 0:
                targets, predictions, val_loss = self._evaluate()
                # convert prediction logits to class
                predictions = np.argmax(predictions, axis=1)
                targets = np.argmax(targets, axis=1)
                accuracy = accuracy_score(targets, predictions)
                f1 = f1_score(targets, predictions, average="macro")
                print("="*50)
                print("="*50)
                print(f"Epoch: {epoch+1}")
                print(f"Training Loss: {total_loss / self.eval_steps}")
                print(f"Validation Loss: {val_loss}, Accuracy: {accuracy}, F1: {f1}")
                print("="*50)
                print("="*50)
                if val_loss < self.best_loss:
                    print("** NEW BEST MODEL FOUND! **")
                    self.best_loss = val_loss
                    patience = 0
                    self.model.save_pretrained(self.model_save_path)
                else:
                    patience += 1
                if patience == self.early_stopping_patience:
                    break
                total_loss = 0
            step += 1
        return step, patience
    
    def train(
            self
    ):
        step = 0
        self.best_loss = float("inf")
        patience = 0
        early_stopped = False
        self.train_loader = DataLoader(self.train_dataset, batch_size=self.batch_size, shuffle=True)
        self.val_loader = DataLoader(self.val_dataset, batch_size=self.batch_size, shuffle=False)
        if isinstance(self.eval_steps, float):
            self.eval_steps = int((len(train_dataset) / self.batch_size) * self.eval_steps)
        for epoch in range(self.n_epochs):
            step, patience = self._run_epoch(epoch, step, patience)
            if epoch == 1:
                self.model.trainable_embeddings(False)
            if patience == self.early_stopping_patience:
                early_stopped = True
                break
        if early_stopped:
            print("** EARLY STOPPING... **")
            self.model = ANN.load_pretrained(self.model_save_path)
        else:
            self.model.save_pretrained(self.model_save_path)
        return self.model

numerical_features = train_data.numerical_features.shape[1]
categorical_features = train_data.categorical_features.shape[1]
output_classes = train_data.target.shape[1]
hidden_units = [int(128), int(64), int(32)]

model = ANN(
    numerical_features=numerical_features,
    categorical_max_values=[
        train_data.categorical_features[:, i].max() 
        for i in range(categorical_features)
    ],
    output_classes=output_classes,
    hidden_units=hidden_units
)

optimizer = torch.optim.Adam(
    model.parameters(),
    lr=1e-4,
    weight_decay=0.01
)

criterion = nn.CrossEntropyLoss()

trainer = Trainer(
    model=model,
    optimizer=optimizer,
    criterion=criterion,
    train_dataset=train_dataset,
    val_dataset=val_dataset,
    model_save_path="./model",
    gradient_accumulation_steps=1,
    eval_steps=0.2,
    early_stopping_patience=5,
    batch_size=32,
)

  from .autonotebook import tqdm as notebook_tqdm


In [12]:
model = trainer.train()

Epoch 1:  20%|██        | 383/1913 [03:19<13:23,  1.91it/s]

Epoch: 1
Training Loss: 0.8275176863239698
Validation Loss: 0.6670208794214532, Accuracy: 0.770321484579195, F1: 0.7406429906611834
** NEW BEST MODEL FOUND! **


Epoch 1:  40%|███▉      | 765/1913 [06:43<10:04,  1.90it/s]

Epoch: 1
Training Loss: 0.6371946043837133
Validation Loss: 0.6035874925227155, Accuracy: 0.7763983272347099, F1: 0.7484631799212819
** NEW BEST MODEL FOUND! **


Epoch 1:  60%|█████▉    | 1147/1913 [10:06<06:40,  1.91it/s]

Epoch: 1
Training Loss: 0.5987304458053324
Validation Loss: 0.5971181910321708, Accuracy: 0.7658782017773131, F1: 0.7444814010582189
** NEW BEST MODEL FOUND! **


Epoch 1:  80%|███████▉  | 1530/1913 [13:33<07:52,  1.23s/it]

Epoch: 1
Training Loss: 0.5878485025769753
Validation Loss: 0.5981724078072884, Accuracy: 0.7802535284892839, F1: 0.7520908103429672


Epoch 1: 100%|█████████▉| 1911/1913 [16:57<00:01,  1.88it/s]

Epoch: 1
Training Loss: 0.6012261218893591
Validation Loss: 0.5960061316201483, Accuracy: 0.7960010454783063, F1: 0.7595544746369384
** NEW BEST MODEL FOUND! **


Epoch 2:  20%|█▉        | 381/1913 [03:26<31:33,  1.24s/it] 

Epoch: 2
Training Loss: 0.6567351346084584
Validation Loss: 0.6062277268641677, Accuracy: 0.77946941975954, F1: 0.7536386568969133


Epoch 2:  40%|███▉      | 763/1913 [06:54<23:45,  1.24s/it]

Epoch: 2
Training Loss: 0.5950847332078125
Validation Loss: 0.6063687797628017, Accuracy: 0.7417668583376895, F1: 0.7292720509409539


Epoch 2:  60%|█████▉    | 1144/1913 [10:19<06:49,  1.88it/s]

Epoch: 2
Training Loss: 0.5770867892116777
Validation Loss: 0.5826391320138983, Accuracy: 0.7735232618923157, F1: 0.7516618844513158
** NEW BEST MODEL FOUND! **


Epoch 2:  80%|███████▉  | 1526/1913 [13:46<03:25,  1.88it/s]

Epoch: 2
Training Loss: 0.581522385955481
Validation Loss: 0.5791259238142559, Accuracy: 0.7805802404600104, F1: 0.7566842375489321
** NEW BEST MODEL FOUND! **


Epoch 2: 100%|█████████▉| 1909/1913 [17:17<00:04,  1.22s/it]

Epoch: 2
Training Loss: 0.5751915307257188
Validation Loss: 0.5835602588344965, Accuracy: 0.7863303711447988, F1: 0.7582500350108181


Epoch 3:  20%|█▉        | 382/1913 [00:14<03:52,  6.58it/s] 

Epoch: 3
Training Loss: 0.6220131209851559
Validation Loss: 0.5922149558530222, Accuracy: 0.78254051228437, F1: 0.7570481626743102


Epoch 3:  40%|████      | 766/1913 [00:27<02:54,  6.57it/s]

Epoch: 3
Training Loss: 0.5854725807671147
Validation Loss: 0.5840131774824696, Accuracy: 0.7965237846314689, F1: 0.7626204820568044


Epoch 3:  59%|█████▉    | 1138/1913 [00:38<00:22, 34.99it/s]

Epoch: 3
Training Loss: 0.5873049345862179
Validation Loss: 0.5769317035013052, Accuracy: 0.7846968112911658, F1: 0.7594784795425534
** NEW BEST MODEL FOUND! **


Epoch 3:  80%|███████▉  | 1521/1913 [00:52<00:11, 34.57it/s]

Epoch: 3
Training Loss: 0.5656037951639186
Validation Loss: 0.5752002145153994, Accuracy: 0.7899242028227914, F1: 0.7621432082856682
** NEW BEST MODEL FOUND! **


Epoch 3: 100%|█████████▉| 1904/1913 [01:06<00:00, 34.46it/s]

Epoch: 3
Training Loss: 0.5752021444093495
Validation Loss: 0.5751263611874152, Accuracy: 0.7837820177731312, F1: 0.7588371153603898
** NEW BEST MODEL FOUND! **


Epoch 4:  20%|█▉        | 379/1913 [00:13<04:02,  6.32it/s] 

Epoch: 4
Training Loss: 0.5976190166791696
Validation Loss: 0.5889303746452411, Accuracy: 0.7856116048092002, F1: 0.759475539688666


Epoch 4:  40%|███▉      | 760/1913 [00:28<04:10,  4.61it/s]

Epoch: 4
Training Loss: 0.5802397942511823
Validation Loss: 0.5829754978853875, Accuracy: 0.7976346053319394, F1: 0.7645719811903314


Epoch 4:  60%|█████▉    | 1143/1913 [00:42<01:57,  6.53it/s]

Epoch: 4
Training Loss: 0.5812896348497006
Validation Loss: 0.5839403085021734, Accuracy: 0.7971772085729221, F1: 0.763825851490694


Epoch 4:  80%|███████▉  | 1527/1913 [00:55<00:57,  6.68it/s]

Epoch: 4
Training Loss: 0.5700094401836395
Validation Loss: 0.5767281928366061, Accuracy: 0.78659174072138, F1: 0.7589958314118693


Epoch 4:  99%|█████████▉| 1899/1913 [01:06<00:00, 34.80it/s]

Epoch: 4
Training Loss: 0.5815176256196037
Validation Loss: 0.5742836272293441, Accuracy: 0.792472556194459, F1: 0.764381010435668
** NEW BEST MODEL FOUND! **


Epoch 5:  20%|█▉        | 374/1913 [00:13<05:15,  4.87it/s] 

Epoch: 5
Training Loss: 0.5887955896829435
Validation Loss: 0.5885224261463061, Accuracy: 0.7807109252483011, F1: 0.7567257115592372


Epoch 5:  40%|███▉      | 757/1913 [00:27<04:25,  4.35it/s]

Epoch: 5
Training Loss: 0.5817614945519657
Validation Loss: 0.5833222254457454, Accuracy: 0.7762676424464192, F1: 0.7521235049422194


Epoch 5:  60%|█████▉    | 1141/1913 [00:41<02:21,  5.45it/s]

Epoch: 5
Training Loss: 0.5799596693503295
Validation Loss: 0.5805773297655309, Accuracy: 0.7702561421850497, F1: 0.750554198732612


Epoch 5:  80%|███████▉  | 1525/1913 [00:55<00:58,  6.66it/s]

Epoch: 5
Training Loss: 0.574587869511537
Validation Loss: 0.579198559788921, Accuracy: 0.7729351803450079, F1: 0.7528466694807268


                                                            

Epoch: 5
Training Loss: 0.5708548680181903
Validation Loss: 0.5788962571630896, Accuracy: 0.7837820177731312, F1: 0.757680464882115
** EARLY STOPPING... **


In [18]:
# model = ANN.load_pretrained(r"C:\Users\paulw\Documents\graduation_competition\model")

# use model on val_dataset
model.eval()
model.to(torch.device("cpu"))
targets = []
predictions = []
total_loss = 0
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
progress_bar = tqdm(enumerate(val_loader), total=len(val_loader), desc=f"Validating...", leave=False, position=1)
with torch.no_grad():
    for batch in val_loader:
        progress_bar.update(1)
        numerical = batch["numerical"]
        categorical = batch["categorical"]
        target = batch["target"]
        output = model(numerical, categorical)
        output = output.cpu()
        target = target.cpu()
        loss = criterion(output, target)
        targets.extend(target.numpy())
        predictions.extend(output.numpy())
        total_loss += loss.item()

# convert prediction logits to class
predictions = np.argmax(predictions, axis=1)
targets = np.argmax(targets, axis=1)
accuracy = accuracy_score(targets, predictions)
f1 = f1_score(targets, predictions, average="macro")
print("="*50)
print("="*50)
print(f"Validation Loss: {total_loss / len(val_loader)}")
print(f"Accuracy: {accuracy}")
print(f"F1: {f1}")
print("="*50)
print("="*50)



Validation Loss: 0.5742836272293441
Accuracy: 0.792472556194459
F1: 0.764381010435668




In [14]:
test = pd.read_csv(test_path)

test_data = preprocessing.run_pipeline_new_data(test)

test_dataset = Dataset(
    numerical_features=test_data.numerical_features,
    categorical_features=test_data.categorical_features,
    target=test_data.target
)

test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

predictions = []
model.eval()
model.to(torch.device("cpu"))
with torch.no_grad():
    for batch in test_loader:
        numerical = batch["numerical"]
        categorical = batch["categorical"]
        output = model.predict(numerical, categorical)
        predictions.extend(output.numpy())

In [15]:
labels = preprocessing.get_class_labels(predictions)

In [16]:
test["Target"] = labels.reshape(-1)

In [17]:
test[["id", "Target"]].to_csv("submission.csv", index=False)

