In [1]:
import os
from google.colab import drive

# Mount Google Drive
drive.mount("/content/drive")

%cd "/content/drive/MyDrive/CS5780/Movie-Review-Preference-Analysis"

# Verify the current working directory
print("Current working directory:", os.getcwd())

# List the contents of the folder
folder_contents = os.listdir()
print(folder_contents)

Mounted at /content/drive
/content/drive/MyDrive/CS5780/Movie-Review-Preference-Analysis
Current working directory: /content/drive/MyDrive/CS5780/Movie-Review-Preference-Analysis
['README.md', '.gitignore', 'try.ipynb', 'LICENSE', 'data', 'architecture_result.csv', 'submission.csv', 'main.ipynb']


# 1. Import

In [2]:
import random
import pandas as pd
import numpy as np
from tqdm import tqdm
from typing import Optional
import itertools

# Sklearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

# Torch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
from torch.utils.data import DataLoader, TensorDataset

# 2. Data Loading

In [3]:
import numpy as np

def load_npz(file_path):
    with np.load(file_path) as data:
        return {key: data[key] for key in data}

train_data = load_npz(r'./data/train.npz')
test_data = load_npz(r'./data/test.npz')
train_emb1, train_emb2, train_labels = train_data['emb1'], train_data['emb2'], train_data['preference']
test_emb1, test_emb2 = test_data['emb1'], test_data['emb2']

# 3. Exploration

In [4]:
train_data

{'uid': array([    0,     1,     2, ..., 18747, 18748, 18749]),
 'emb1': array([[-0.05075016, -0.03491386, -0.05787281, ...,  0.00020284,
          0.02388327, -0.02491781],
        [-0.12402835, -0.07631648, -0.05782915, ...,  0.02713838,
          0.01394665,  0.0186507 ],
        [-0.06794146, -0.0385992 ,  0.04476113, ...,  0.07999779,
          0.04943484,  0.00783883],
        ...,
        [ 0.02096516, -0.00752076, -0.06958353, ...,  0.01346127,
          0.01917063, -0.06059628],
        [-0.00901941,  0.01330765, -0.02343761, ..., -0.02690429,
          0.0084649 ,  0.01999134],
        [-0.05510234,  0.00251053, -0.01775946, ...,  0.00322949,
         -0.02700103,  0.01986161]], dtype=float32),
 'emb2': array([[-0.03255587,  0.01327268, -0.00508326, ..., -0.01196616,
         -0.03564733, -0.03713938],
        [-0.00014027,  0.03904634,  0.0592997 , ...,  0.00117963,
          0.04012304,  0.07394706],
        [-0.068197  , -0.0943828 ,  0.04236921, ...,  0.0225933 ,
        

In [5]:
for key, value in train_data.items():
    print(f"Length of '{key}': {len(value) if isinstance(value, np.ndarray) else 'Not an array'}")

Length of 'uid': 18750
Length of 'emb1': 18750
Length of 'emb2': 18750
Length of 'preference': 18750


In [6]:
train_data.keys()

dict_keys(['uid', 'emb1', 'emb2', 'preference'])

In [7]:
# x1
print(train_data['emb1'][0].shape) # (384,)
# x2
print(train_data['emb2'][0].shape) # (384,)
# y
print(train_data['preference'][0]) # 1
# train_data['emb1'][0]

(384,)
(384,)
1


# 4. Preprocessing

In [8]:
## Parameters

# Preprocessing Parameters
validation_size = 0.2
RAND_STATE = 5780
shuffle_split = True
standardized = False

In [9]:
def train_validation_split(Xs, Ys, validation_size: float=0.2):
    Xs_tr, Xs_va, Ys_tr, Ys_va = train_test_split(Xs, Ys, test_size=validation_size, random_state=RAND_STATE, shuffle=shuffle_split, stratify=Ys)
    return torch.Tensor(Xs_tr), torch.Tensor(Xs_va), torch.Tensor(Ys_tr).long(), torch.Tensor(Ys_va).long()

In [10]:
def standardization(Xs):
    scaler = StandardScaler()
    Xs_scaled = scaler.fit_transform(Xs)
    return torch.Tensor(Xs_scaled)

In [11]:
print(train_data['emb1'].shape) # (n x d): (18750, 384)
print(train_data['emb2'].shape) # (n x d): (18750, 384)

# Concatenate the input in to a single long vector
Xs = np.concatenate((train_data['emb1'], train_data['emb2']), axis=1)
Ys = train_data['preference']

# Train Validation Split
Xs_tr, Xs_va, Ys_tr, Ys_va = train_validation_split(Xs, Ys, validation_size)

if standardized:
    Xs_tr = standardization(Xs_tr)
    Xs_va = standardization(Xs_va)

# Convert to Torch
print(f'Xs_tr.shape: {Xs_tr.shape}')
print(f'Ys_tr.shape: {Ys_tr.shape}')
print(f'Xs_va.shape: {Xs_va.shape}')
print(f'Ys_va.shape: {Ys_va.shape}')

(18750, 384)
(18750, 384)
Xs_tr.shape: torch.Size([15000, 768])
Ys_tr.shape: torch.Size([15000])
Xs_va.shape: torch.Size([3750, 768])
Ys_va.shape: torch.Size([3750])


# 5. Model

In [12]:
# Parameters
embedding_dim = 768
hidden_dim = 128
output_dim = 2
num_layers = 1
activation = "relu"

# Improvement
dropout_rate = 0.5
include_batch_norm = True
initialize_weights = False

In [13]:
# FFNN Model
class FFNN(nn.Module):
    def __init__(
        self,
        embedding_dim: int,
        hidden_dim: int,
        output_dim: int,
        activation: str = "relu",
        num_layers: int = 1,
        include_batch_norm: bool = False,
        initialize_weights: bool = False,
        dropout_rate: Optional[float] = None
    ) -> None:

        super().__init__()
        assert num_layers > 0

        # FFNN architecture attributes
        self.embedding_dim = embedding_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.activation = activation
        self.num_layers = num_layers

        # Layer attributes
        self.input_layer = nn.Linear(self.embedding_dim, self.hidden_dim)
        self.hidden_layers = nn.ModuleList()
        for _ in range(self.num_layers - 1):
            self.hidden_layers.append(nn.Linear(self.hidden_dim, self.hidden_dim))
        self.output_layer = nn.Linear(self.hidden_dim, self.output_dim)

        # Weight initialization attributes
        self.initialize_weights = initialize_weights
        if initialize_weights:
            init.xavier_normal_(self.input_layer.weight)
            for hidden_layer in self.hidden_layers:
                init.xavier_normal_(hidden_layer.weight)
            init.xavier_normal_(self.output_layer.weight)

        # FFNN performance improvement attributes
        self.dropout_rate = dropout_rate
        if dropout_rate is not None:
            self.dropout = nn.Dropout(p=self.dropout_rate)
        else:
            self.dropout = None
        self.include_batch_norm = include_batch_norm
        if include_batch_norm:
            self.batch_norm = nn.BatchNorm1d(self.hidden_dim)

    def forward(self, embeddings: torch.Tensor) -> torch.Tensor:
        x = self.input_layer(embeddings)
        for hidden_layer in self.hidden_layers:
            # Forward layer
            x = hidden_layer(x)

            # Batch normalization layer
            if self.include_batch_norm:
                x = self.batch_norm(x)

            # Non-linear layer
            if self.activation == "relu":
                x = F.relu(x)
            elif self.activation == "tanh":
                x = F.tanh(x)
            elif self.activation == "sigmoid":
                x = F.sigmoid(x)

            # Drop out regularization layer
            if self.dropout_rate is not None:
                x = self.dropout(x)
        output = self.output_layer(x)
        return output

In [14]:
# Test
ffnn = FFNN(
    embedding_dim=embedding_dim,
    hidden_dim=hidden_dim,
    output_dim=output_dim,
    activation=activation,
    num_layers=num_layers,
    include_batch_norm=include_batch_norm,
    initialize_weights=initialize_weights,
)
ffnn

FFNN(
  (input_layer): Linear(in_features=768, out_features=128, bias=True)
  (hidden_layers): ModuleList()
  (output_layer): Linear(in_features=128, out_features=2, bias=True)
  (batch_norm): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)

# 6. Model Training

In [15]:
# Parameters
epochs = 10
batch_size = 100
alpha = 0.1
beta = 0.9
rho1 = 0.99
rho2 = 0.999
# grad_clip_max_norm = 1

# Optimizers
sgd_optimizer = torch.optim.SGD(ffnn.parameters(), lr=alpha)
adam_optimizer = torch.optim.Adam(ffnn.parameters(), lr=alpha)
adamw_optimizer = torch.optim.AdamW(ffnn.parameters(), lr=alpha)
rmsprop_optimizer = torch.optim.RMSprop(ffnn.parameters(), lr=alpha)

# Loss functions
binary_cross_entropy_loss_fn = torch.nn.BCELoss()
cross_entropy_loss_fn = torch.nn.CrossEntropyLoss()

In [16]:
# evaluate a trained model on MNIST data
#
# dataloader    dataloader of examples to evaluate on
# model         trained PyTorch model
# loss_fn       loss function (e.g. torch.nn.CrossEntropyLoss)
#
# returns       tuple of (loss, accuracy), both python floats
@torch.no_grad()
def evaluate_model(Xs_va, Ys_va, model, loss_fn):
	model.eval()
	total_loss = 0.0
	total_correct = 0
	total_samples = 0

	# Create DataLoader for batching
	validation_dataset = TensorDataset(Xs_va, Ys_va)
	validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=True)

	for X, Y in validation_loader:
		Y_pred_prob = model(X)
		loss = loss_fn(Y_pred_prob, Y)
		total_loss += loss.item()

		Y_pred = torch.argmax(Y_pred_prob, dim=1)
		total_correct += torch.sum(Y_pred == Y).item()
		total_samples += Y.size(0)

	average_loss = total_loss / len(validation_loader)
	accuracy = total_correct / total_samples

	return average_loss, accuracy

In [17]:
def train(Xs_tr, Ys_tr, Xs_va, Ys_va, model, loss_fn, optimizer, epochs, batch_size, grad_clip_max_norm: Optional[float] = None):
    validation_losses = []
    validation_accuracies = []

    # Create DataLoader for batching
    train_dataset = TensorDataset(Xs_tr, Ys_tr)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

    for epoch in range(epochs):
        # Set to training mode
        model.train()

        total_loss = 0.0

        for i, (X, Y) in enumerate(train_loader):
            # Zero gradients for every batch
            optimizer.zero_grad()

            # Make predictions for this batch
            Y_pred_prob = model(X)

            # Compute the loss and its gradients
            loss = loss_fn(Y_pred_prob, Y)
            loss.backward()

            if grad_clip_max_norm is not None:
                nn.utils.clip_grad_norm_(model.parameters(), max_norm=grad_clip_max_norm)

            # Adjust learning weights
            optimizer.step()

            # Gather data and report
            total_loss += loss.item()

        # # Calculate average training loss
        # avg_train_loss = total_loss / len(train_loader)

        # Evaluate the model
        validation_loss, validation_accuracy = evaluate_model(Xs_va, Ys_va, model, loss_fn)
        validation_losses.append(validation_loss)
        validation_accuracies.append(validation_accuracy)
        print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {loss.item()}, Validation Loss: {validation_loss:.3f}, Validation Accuracy: {validation_accuracy:.3f}")

    best_validation_loss = min(validation_losses)
    best_validation_accuracy = max(validation_accuracies)
    print(f"Minimum Loss: {best_validation_loss:.3f}, Max Accuracy: {best_validation_accuracy:.3f}")
    return best_validation_loss, best_validation_accuracy

In [18]:
# FFNN
train(Xs_tr, Ys_tr, Xs_va, Ys_va, ffnn, cross_entropy_loss_fn, adam_optimizer, epochs, batch_size, grad_clip_max_norm=None)

Epoch 1/10, Training Loss: 0.3509211838245392, Validation Loss: 0.288, Validation Accuracy: 0.883
Epoch 2/10, Training Loss: 0.2655160427093506, Validation Loss: 0.277, Validation Accuracy: 0.886
Epoch 3/10, Training Loss: 0.3295680284500122, Validation Loss: 0.323, Validation Accuracy: 0.870
Epoch 4/10, Training Loss: 0.3030592203140259, Validation Loss: 0.343, Validation Accuracy: 0.873
Epoch 5/10, Training Loss: 0.3285124897956848, Validation Loss: 0.664, Validation Accuracy: 0.771
Epoch 6/10, Training Loss: 0.7560265064239502, Validation Loss: 0.696, Validation Accuracy: 0.831
Epoch 7/10, Training Loss: 0.8313727378845215, Validation Loss: 1.205, Validation Accuracy: 0.873
Epoch 8/10, Training Loss: 1.1097652912139893, Validation Loss: 1.025, Validation Accuracy: 0.865
Epoch 9/10, Training Loss: 0.5087457299232483, Validation Loss: 0.894, Validation Accuracy: 0.848
Epoch 10/10, Training Loss: 0.8999691605567932, Validation Loss: 1.169, Validation Accuracy: 0.858
Minimum Loss: 0.277

(0.27683932726320465, 0.8864)

# 7. Hyperparameter Tuning

In [None]:
# Architecture Parameters
embedding_dim = 768
hidden_dim = 128
output_dim = 2
num_layers = 1
activation = "relu"

# Architecture Improvement Parameters
dropout_rate = 0.5
include_batch_norm = True
initialize_weights = False

# Training Parameters
epochs = 10
batch_size = 100
alpha = 0.1
beta = 0.9
rho1 = 0.99
rho2 = 0.999
grad_clip_max_norm = 1

# Optimizers
sgd_optimizer = torch.optim.SGD(ffnn.parameters(), lr=alpha)
adam_optimizer = torch.optim.Adam(ffnn.parameters(), lr=alpha)
adamw_optimizer = torch.optim.AdamW(ffnn.parameters(), lr=alpha)
rmsprop_optimizer = torch.optim.RMSprop(ffnn.parameters(), lr=alpha)

# Loss functions
cross_entropy_loss_fn = torch.nn.CrossEntropyLoss()

In [None]:
# FFNN
param_grid = {
    'hidden_dims': [32, 64, 128, 256, 512],
    'activations': ["relu", "tanh", "sigmoid"],
    'num_layers': [1, 2, 3, 4, 5],
    'include_batch_norm': [True, False],
    'initialize_weights': [True, False],
    'dropout_rates': [None, 0.1, 0.2, 0.3, 0.4, 0.5],
    'batch_sizes': [64, 100, 128, 256, 512],
    'grad_clip_max_norms': [None, 1, 2, 3, 4, 5, 6]
}

grid_search_combinations = list(itertools.product(*param_grid.values()))
len(grid_search_combinations)

63000

In [None]:
# Randomly sample a subset of combinations for random search
num_random_samples = 50
random_search_combinations = random.sample(grid_search_combinations, num_random_samples)

In [None]:
iteration_count = 0  # Initialize the counter

results = []
for (
    hidden_dim,
    activation,
    num_layer,
    include_batch_norm,
    initialize_weights,
    dropout_rate,
    batch_size,
    grad_clip_max_norm
    ) in random_search_combinations:

    iteration_count += 1  # Increment the counter for each iteration

    # FFNN Architecture
    ffnn = FFNN(
        embedding_dim=embedding_dim,
        hidden_dim=hidden_dim,
        output_dim=output_dim,
        num_layers=num_layers,
        include_batch_norm=include_batch_norm,
        initialize_weights=initialize_weights,
        dropout_rate=dropout_rate
    )

    # Optimizer
    adam_optimizer = torch.optim.Adam(ffnn.parameters(), lr=alpha)

    # Training
    best_validation_loss, best_validation_accuracy = train(
        Xs_tr,
        Ys_tr,
        Xs_va,
        Ys_va,
        ffnn,
        cross_entropy_loss_fn,
        adam_optimizer,
        epochs,
        batch_size,
        grad_clip_max_norm
    )

    # Result
    result = dict(
        zip(
            param_grid.keys(),
            (hidden_dim, activation, num_layer, include_batch_norm, initialize_weights, dropout_rate)
        )
    )
    result["best_validation_loss"] = best_validation_loss
    result["best_validation_accuracy"] = best_validation_accuracy
    result["iteration"] = iteration_count  # Add the iteration count to the result
    results.append(result)

    print("Number of iterations:", iteration_count)

Epoch 1/10, Training Loss: 0.818, Validation Loss: 0.403, Validation Accuracy: 0.852
Epoch 2/10, Training Loss: 0.839, Validation Loss: 1.280, Validation Accuracy: 0.794
Epoch 3/10, Training Loss: 1.451, Validation Loss: 1.714, Validation Accuracy: 0.787
Epoch 4/10, Training Loss: 1.533, Validation Loss: 1.831, Validation Accuracy: 0.827
Epoch 5/10, Training Loss: 1.571, Validation Loss: 1.397, Validation Accuracy: 0.845
Epoch 6/10, Training Loss: 1.866, Validation Loss: 2.094, Validation Accuracy: 0.822
Epoch 7/10, Training Loss: 1.677, Validation Loss: 1.613, Validation Accuracy: 0.845
Epoch 8/10, Training Loss: 1.592, Validation Loss: 2.072, Validation Accuracy: 0.824
Epoch 9/10, Training Loss: 2.052, Validation Loss: 3.152, Validation Accuracy: 0.801
Epoch 10/10, Training Loss: 2.188, Validation Loss: 3.239, Validation Accuracy: 0.720
Minimum Loss: 0.403, Max Accuracy: 0.852
Number of iterations: 1
Epoch 1/10, Training Loss: 4.634, Validation Loss: 0.357, Validation Accuracy: 0.857

In [None]:
result_df = pd.DataFrame(results)
result_df

Unnamed: 0,hidden_dims,activations,num_layers,include_batch_norm,initialize_weights,dropout_rates,best_validation_loss,best_validation_accuracy,iteration
0,128,sigmoid,3,True,True,0.4,0.403304,0.852,1
1,128,tanh,5,False,False,0.1,0.262024,0.894133,2
2,128,sigmoid,4,False,True,0.2,0.629443,0.861067,3
3,128,relu,3,False,False,0.4,0.415441,0.866133,4
4,128,relu,1,False,False,0.1,0.259905,0.892,5
5,128,relu,3,True,False,,0.276657,0.8832,6
6,32,tanh,2,True,False,0.2,0.296509,0.8792,7
7,512,tanh,4,False,False,,0.296433,0.882933,8
8,64,tanh,1,False,False,0.3,0.268754,0.894667,9
9,64,tanh,1,True,True,0.2,0.257817,0.8928,10


In [None]:
result_df.loc[result_df.best_validation_accuracy == result_df.best_validation_accuracy.max(), :]

Unnamed: 0,hidden_dims,activations,num_layers,include_batch_norm,initialize_weights,dropout_rates,best_validation_loss,best_validation_accuracy,iteration
8,64,tanh,1,False,False,0.3,0.268754,0.894667,9


In [None]:
result_df.best_validation_accuracy.max()

0.8946666666666667

In [None]:
result_df.to_csv("architecture_result.csv")

# 8. Submission

In [21]:
def make_prediction(Xs_te, model):
    Y_preds_prob = model(Xs_te)
    Y_preds = torch.argmax(Y_preds_prob, axis = 1)
    return Y_preds

In [22]:
def make_submission(uid, Y_preds):
    df = pd.DataFrame({'uid': uid, 'preference': Y_preds})
    df.to_csv('submission.csv', index = False)

In [None]:
Xs_te = np.concatenate((test_data['emb1'], test_data['emb2']), axis=1)
Xs_te = torch.Tensor(Xs_te)
Y_preds = make_prediction(Xs_te, ffnn)
make_submission(test_data['uid'], np.array(Y_preds))

# 9. Appendix

In [None]:
class RNN(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim, num_layers):
        super().__init__()
        self.hidden_layers = nn.RNN(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Forward pass through the RNN layer
        out, _ = self.hidden_layers(x)

        # Take the output from the last time step and pass it through the fully connected layer
        out = self.output_layer(out)
        return out

rnn = RNN(embedding_dim, hidden_dim, output_dim, num_layers)
rnn

RNN(
  (hidden_layers): RNN(768, 256, batch_first=True)
  (output_layer): Linear(in_features=256, out_features=2, bias=True)
)

In [None]:
# RNN
train(Xs_tr, Ys_tr, Xs_va, Ys_va, rnn, cross_entropy_loss_fn, adam_optimizer, epochs, grad_clip_max_norm)

Epoch 1/10, Training Loss: 0.6622525453567505, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 2/10, Training Loss: 0.7058525681495667, Validation Loss: 0.694, Validation Accuracy: 0.499
Epoch 3/10, Training Loss: 0.7326067686080933, Validation Loss: 0.694, Validation Accuracy: 0.499
Epoch 4/10, Training Loss: 0.7132993936538696, Validation Loss: 0.694, Validation Accuracy: 0.499
Epoch 5/10, Training Loss: 0.6166714429855347, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 6/10, Training Loss: 0.6832106709480286, Validation Loss: 0.692, Validation Accuracy: 0.499
Epoch 7/10, Training Loss: 0.69664067029953, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 8/10, Training Loss: 0.6812561750411987, Validation Loss: 0.694, Validation Accuracy: 0.499
Epoch 9/10, Training Loss: 0.6765474677085876, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 10/10, Training Loss: 0.6975078582763672, Validation Loss: 0.693, Validation Accuracy: 0.499
Minimum Loss: 0.692, 

(0.6921877190470695, 0.5)

In [None]:
class LSTM(nn.Module):
    def __init__(self, embedding_dim, hidden_dim, output_dim, num_layers):
        super().__init__()
        self.hidden_layers = nn.LSTM(embedding_dim, hidden_dim, num_layers, batch_first=True)
        self.output_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # Forward pass through the RNN layer
        out, _ = self.hidden_layers(x)

        # Take the output from the last time step and pass it through the fully connected layer
        out = self.output_layer(out)
        return out

lstm = LSTM(embedding_dim, hidden_dim, output_dim, num_layers)
lstm

LSTM(
  (hidden_layers): LSTM(768, 256, batch_first=True)
  (output_layer): Linear(in_features=256, out_features=2, bias=True)
)

In [None]:
# LSTM
train(Xs_tr, Ys_tr, Xs_va, Ys_va, lstm, cross_entropy_loss_fn, adam_optimizer, epochs, grad_clip_max_norm)

Epoch 1/10, Training Loss: 0.6907047033309937, Validation Loss: 0.693, Validation Accuracy: 0.500
Epoch 2/10, Training Loss: 0.6963127851486206, Validation Loss: 0.693, Validation Accuracy: 0.500
Epoch 3/10, Training Loss: 0.6875535249710083, Validation Loss: 0.693, Validation Accuracy: 0.499
Epoch 4/10, Training Loss: 0.6954268217086792, Validation Loss: 0.693, Validation Accuracy: 0.500
Epoch 5/10, Training Loss: 0.7069279551506042, Validation Loss: 0.693, Validation Accuracy: 0.499
Epoch 6/10, Training Loss: 0.6905986070632935, Validation Loss: 0.693, Validation Accuracy: 0.500
Epoch 7/10, Training Loss: 0.6969180703163147, Validation Loss: 0.693, Validation Accuracy: 0.499
Epoch 8/10, Training Loss: 0.706170916557312, Validation Loss: 0.693, Validation Accuracy: 0.500
Epoch 9/10, Training Loss: 0.6837252378463745, Validation Loss: 0.694, Validation Accuracy: 0.499
Epoch 10/10, Training Loss: 0.6918861269950867, Validation Loss: 0.694, Validation Accuracy: 0.500
Minimum Loss: 0.693,

(0.6931802779436111, 0.5002666666666666)

In [19]:
class Perceptron(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Perceptron, self).__init__()
        self.linear = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.linear(x)

input_dim = Xs_tr.shape[1]
output_dim = 2

perceptron = Perceptron(input_dim, output_dim)
perceptron

Perceptron(
  (linear): Linear(in_features=768, out_features=2, bias=True)
)

In [None]:
# Perceptron
train(Xs_tr, Ys_tr, Xs_va, Ys_va, perceptron, cross_entropy_loss_fn, adam_optimizer, epochs, grad_clip_max_norm)

Epoch 1/10, Training Loss: 0.6937889456748962, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 2/10, Training Loss: 0.7078548669815063, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 3/10, Training Loss: 0.6957200765609741, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 4/10, Training Loss: 0.6686443090438843, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 5/10, Training Loss: 0.6857262849807739, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 6/10, Training Loss: 0.6737910509109497, Validation Loss: 0.693, Validation Accuracy: 0.500
Epoch 7/10, Training Loss: 0.7012429237365723, Validation Loss: 0.693, Validation Accuracy: 0.500
Epoch 8/10, Training Loss: 0.6913532614707947, Validation Loss: 0.694, Validation Accuracy: 0.500
Epoch 9/10, Training Loss: 0.6994125843048096, Validation Loss: 0.693, Validation Accuracy: 0.500
Epoch 10/10, Training Loss: 0.6675577163696289, Validation Loss: 0.694, Validation Accuracy: 0.500
Minimum Loss: 0.693

(0.6930814981460571, 0.5002666666666666)

In [30]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

class SVM:
    def __init__(self, kernel='linear', C=1.0):
        self.model = SVC(kernel=kernel, C=C)

    def fit(self, X, Y):
        self.model.fit(X, Y)

    def predict(self, X):
        return self.model.predict(X)

def train_svm(Xs_tr, Ys_tr, Xs_va, Ys_va, svm):
    # Train the SVM model
    svm.fit(Xs_tr, Ys_tr)

    # Predict on the validation set
    predictions = svm.predict(Xs_va)

    # Evaluate accuracy
    accuracy = accuracy_score(Ys_va, predictions)
    print(f"Validation Accuracy: {accuracy:.3f}")

    return accuracy

# Example usage
svm = SVM(kernel='linear', C=1.0)
accuracy = train_svm(Xs_tr, Ys_tr, Xs_va, Ys_va, svm)
print(f"Final Validation Accuracy: {accuracy:.3f}")

Validation Accuracy: 0.894
Final Validation Accuracy: 0.894


In [31]:
Xs_te = np.concatenate((test_data['emb1'], test_data['emb2']), axis=1)
Xs_te = torch.Tensor(Xs_te)
Y_preds = svm.predict(Xs_te)
make_submission(test_data['uid'], np.array(Y_preds))