In [1]:
import sys

src_dir = '../src'

if src_dir not in sys.path:
    sys.path.append(src_dir)

## Dataloaders

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader

from classifier.dataloader.dataloader import create_dataloaders
from classifier.models.simple_net import NeuralNetwork

In [3]:
torch.manual_seed(0)

<torch._C.Generator at 0x7b410f5aebf0>

In [4]:
data_raw_path = '../data/data_raw.csv'
batch_size = 32
max_features = 1e4
train_ratio = .7
val_ratio = .15
shuffle = True

(

    train_loader, 
    val_loader, 
    test_loader

) = create_dataloaders(

    csv_path=data_raw_path,
    vectorization='tf-idf',
    batch_size=batch_size,
    max_features=int(max_features),
    train_ratio=train_ratio,
    val_ratio=val_ratio,
    shuffle=shuffle

)

## Training of simple dense net

In [5]:
learning_rate = 0.001
epochs = 5

input_size = int(max_features)
hidden_sizes = [128, 64]

model = NeuralNetwork(
    input_size=input_size,
    hidden_sizes=hidden_sizes
)

In [6]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [3]:
from classifier.training_loop import train, evaluate

In [8]:
train(
    model=model, 
    criterion=criterion, 
    optimizer=optimizer, 
    data_loader=train_loader, 
    epochs=epochs
)

Epoch 1/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 2/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 3/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 4/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 5/5:   0%|          | 0/219 [00:00<?, ?batch/s]

In [9]:
average_loss, accuracy = evaluate(
    model=model,
    criterion=criterion,
    data_loader=test_loader
)

Average Loss: 1.8722, Accuracy: 35.00%


## Convolutional network

In [3]:
from classifier.models.conv_net import ConvolutionalNetwork

In [29]:
learning_rate = 0.001
epochs = 5

input_size = int(max_features)

model = ConvolutionalNetwork(
    input_features=input_size,
)

In [30]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [31]:
train(
    model=model, 
    criterion=criterion, 
    optimizer=optimizer, 
    data_loader=train_loader, 
    epochs=epochs
)

Epoch 1/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 2/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 3/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 4/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 5/5:   0%|          | 0/219 [00:00<?, ?batch/s]

In [32]:
average_loss, accuracy = evaluate(
    model=model,
    criterion=criterion,
    data_loader=test_loader
)

Average Loss: 1.8833, Accuracy: 35.53%


## Embedding network

In [4]:
from classifier.models.embedded_net import EmbeddingConvolutionalNetwork

In [5]:
data_raw_path = '../data/data_raw.csv'
batch_size = 32
max_features = 1e4
train_ratio = .7
val_ratio = .15
shuffle = True

(

    train_loader, 
    val_loader, 
    test_loader

) = create_dataloaders(

    csv_path=data_raw_path,
    vectorization='count',
    batch_size=batch_size,
    max_features=int(max_features),
    train_ratio=train_ratio,
    val_ratio=val_ratio,
    shuffle=shuffle

)



In [6]:
learning_rate = 0.001
epochs = 5

vocab_size = 10000  
embedding_dim = 100 
sequence_length = 100

model = EmbeddingConvolutionalNetwork(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    sequence_length=sequence_length
)

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [8]:
train(
    model=model, 
    criterion=criterion, 
    optimizer=optimizer, 
    data_loader=train_loader, 
    epochs=epochs
)

Epoch 1/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 2/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 3/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 4/5:   0%|          | 0/219 [00:00<?, ?batch/s]

Epoch 5/5:   0%|          | 0/219 [00:00<?, ?batch/s]

In [9]:
average_loss, accuracy = evaluate(
    model=model,
    criterion=criterion,
    data_loader=test_loader
)

Average Loss: 1.5296, Accuracy: 34.07%
