In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
# Import necessary libraries
import os

# Create a directory to store the datasets
os.makedirs('datasets', exist_ok=True)

# Define the URLs for the datasets
train_url = "https://www.dropbox.com/scl/fi/ghja3vpc34k78cg89uwjq/test.ft.txt?rlkey=e6rd56dtbv2ypms57m1l76brm&st=plgg4bhb&dl=0"
test_url = "https://www.dropbox.com/scl/fi/iof2u1j31x5ffoytg2du5/train.ft.txt?rlkey=4xld1u112j0gogam297xajqg2&st=yjvlyblz&dl=0"

# Use wget to download the files
!wget -O datasets/train.ft.txt {train_url}
!wget -O datasets/test.ft.txt {test_url}

# Verify the files are downloaded
!ls datasets


--2024-08-06 05:48:08--  https://www.dropbox.com/scl/fi/ghja3vpc34k78cg89uwjq/test.ft.txt?rlkey=e6rd56dtbv2ypms57m1l76brm
Resolving www.dropbox.com (www.dropbox.com)... 162.125.1.18, 2620:100:6016:18::a27d:112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.1.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://uc87e1440f3a1de39400f07833cc.dl.dropboxusercontent.com/cd/0/inline/CYGpyOd88SJ2mQni8ZRuh767JQ6EahZBpYvRXBf1noKEzYAyyLBH5EF--cnTqSYXUizduCu0Q21jGqRlNdZat7cakgT9ag6zrKtt0eg1kN_eJ2d5CZKca5nynS1l79JJ-u8tbwHNWWS5X8yjeo3QlXca/file# [following]
--2024-08-06 05:48:09--  https://uc87e1440f3a1de39400f07833cc.dl.dropboxusercontent.com/cd/0/inline/CYGpyOd88SJ2mQni8ZRuh767JQ6EahZBpYvRXBf1noKEzYAyyLBH5EF--cnTqSYXUizduCu0Q21jGqRlNdZat7cakgT9ag6zrKtt0eg1kN_eJ2d5CZKca5nynS1l79JJ-u8tbwHNWWS5X8yjeo3QlXca/file
Resolving uc87e1440f3a1de39400f07833cc.dl.dropboxusercontent.com (uc87e1440f3a1de39400f07833cc.dl.dropboxusercontent.com)... 162.125.1.15, 2620:10

In [None]:
import pandas as pd
from transformers import RobertaTokenizer, RobertaForSequenceClassification, get_linear_schedule_with_warmup
from sklearn.model_selection import train_test_split
import torch
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from sklearn.metrics import accuracy_score, classification_report
import time

In [None]:
# Reading data
start_time = time.time()
with open("datasets/train.ft.txt", 'r', encoding='utf-8') as file:
    train = file.readlines()

with open("datasets/test.ft.txt", 'r', encoding='utf-8') as file:
    test = file.readlines()

# Processing training data
start_time = time.time()
train_labels = []
train_reviews = []

for line in train:
    if line.startswith("__label__"):
        label = int(line.split(' ')[0].replace('__label__', '').strip())
        review = ' '.join(line.split(' ')[1:]).strip()
        train_labels.append(label)
        train_reviews.append(review)

# Processing test data
test_labels = []
test_reviews = []

for line in test:
    if line.startswith("__label__"):
        label = int(line.split(' ')[0].replace('__label__', '').strip())
        review = ' '.join(line.split(' ')[1:]).strip()
        test_labels.append(label)
        test_reviews.append(review)
data_processing_time = time.time() - start_time

# Creating DataFrames
start_time = time.time()
train_df = pd.DataFrame({
    'review': train_reviews,
    'label': train_labels
})

test_df = pd.DataFrame({
    'review': test_reviews,
    'label': test_labels
})

# Splitting data into training and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)
dataframe_creation_time = time.time() - start_time


In [None]:
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification, get_linear_schedule_with_warmup

# Reading data
start_time = time.time()
with open("datasets/train.ft.txt", 'r', encoding='utf-8') as file:
    train = file.readlines()

with open("datasets/test.ft.txt", 'r', encoding='utf-8') as file:
    test = file.readlines()
data_reading_time = time.time() - start_time

# Processing training data
start_time = time.time()
train_labels = []
train_reviews = []

for line in train:
    if line.startswith("__label__"):
        label = int(line.split(' ')[0].replace('__label__', '').strip())
        review = ' '.join(line.split(' ')[1:]).strip()
        train_labels.append(label)
        train_reviews.append(review)

# Convert labels to 0 and 1
train_labels = [label - 1 for label in train_labels]  # Convert 1 to 0 and 2 to 1

# Processing test data
test_labels = []
test_reviews = []

for line in test:
    if line.startswith("__label__"):
        label = int(line.split(' ')[0].replace('__label__', '').strip())
        review = ' '.join(line.split(' ')[1:]).strip()
        test_labels.append(label)
        test_reviews.append(review)

# Convert labels to 0 and 1
test_labels = [label - 1 for label in test_labels]  # Convert 1 to 0 and 2 to 1

data_processing_time = time.time() - start_time

# Creating DataFrames
start_time = time.time()
train_df = pd.DataFrame({
    'review': train_reviews,
    'label': train_labels
})

test_df = pd.DataFrame({
    'review': test_reviews,
    'label': test_labels
})

# Splitting data into training and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)
dataframe_creation_time = time.time() - start_time

# Loading DistilBERT tokenizer
start_time = time.time()
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
tokenizer_loading_time = time.time() - start_time

class ReviewsDataset(Dataset):
    def __init__(self, reviews, labels, tokenizer, max_len):
        self.reviews = reviews
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.reviews)

    def __getitem__(self, item):
        review = self.reviews[item]
        label = self.labels[item]
        encoding = self.tokenizer.encode_plus(
            review,
            add_special_tokens=True,
            max_length=self.max_len,
            return_token_type_ids=False,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            return_tensors='pt',
        )

        return {
            'review_text': review,
            'input_ids': encoding['input_ids'].flatten(),
            'attention_mask': encoding['attention_mask'].flatten(),
            'labels': torch.tensor(label, dtype=torch.long)
        }

# Creating datasets
start_time = time.time()
train_dataset = ReviewsDataset(
    reviews=train_df.review.to_numpy(),
    labels=train_df.label.to_numpy(),
    tokenizer=tokenizer,
    max_len=160
)

val_dataset = ReviewsDataset(
    reviews=val_df.review.to_numpy(),
    labels=val_df.label.to_numpy(),
    tokenizer=tokenizer,
    max_len=160
)

test_dataset = ReviewsDataset(
    reviews=test_df.review.to_numpy(),
    labels=test_df.label.to_numpy(),
    tokenizer=tokenizer,
    max_len=160
)
dataset_creation_time = time.time() - start_time

# Increase batch size to lower memory consumption
batch_size = 16

# Creating data loaders
start_time = time.time()
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)
data_loader_creation_time = time.time() - start_time

# Loading pre-trained DistilBERT model and unfreezing the last two layers
start_time = time.time()
model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased', num_labels=2)
model_loading_time = time.time() - start_time

# Unfreezing the last two layers
for param in model.distilbert.parameters():
    param.requires_grad = False

for param in model.distilbert.transformer.layer[-2:].parameters():
    param.requires_grad = True

# Set up optimizer and scheduler
optimizer = AdamW(model.parameters(), lr=2e-5)
total_steps = len(train_loader) * 3  # Assuming 3 epochs
scheduler = get_linear_schedule_with_warmup(
    optimizer,
    num_warmup_steps=0,
    num_training_steps=total_steps
)

# Implement gradient accumulation
accumulation_steps = 4

# Training function
def train_epoch(model, data_loader, optimizer, device, scheduler, accumulation_steps):
    model = model.train()
    total_loss = 0

    optimizer.zero_grad()

    for i, batch in enumerate(data_loader):
        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)

        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            labels=labels
        )

        loss = outputs.loss
        total_loss += loss.item()

        loss = loss / accumulation_steps
        loss.backward()

        if (i + 1) % accumulation_steps == 0:
            optimizer.step()
            optimizer.zero_grad()
            scheduler.step()

    return total_loss / len(data_loader)

# Validation function
def eval_model(model, data_loader, device):
    model = model.eval()
    total_loss = 0

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask,
                labels=labels
            )

            loss = outputs.loss
            total_loss += loss.item()

    return total_loss / len(data_loader)

Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Training the model
model = model.to(device)

EPOCHS = 3

# Timing the training process
epoch_times = []
print("Starting training...")
for epoch in range(EPOCHS):
    epoch_start_time = time.time()
    train_loss = train_epoch(model, train_loader, optimizer, device, scheduler, accumulation_steps)
    val_loss = eval_model(model, val_loader, device)
    epoch_end_time = time.time()
    epoch_duration = epoch_end_time - epoch_start_time
    epoch_times.append(epoch_duration)

    print(f'Epoch {epoch + 1}/{EPOCHS} completed in {epoch_duration:.2f} seconds.')
    print(f'Train loss: {train_loss}')
    print(f'Val loss: {val_loss}')

# Save the model
print("Saving the model...")
start_time = time.time()
model.save_pretrained("saved_model_distilbert")
tokenizer.save_pretrained("saved_model_distilbert")
model_saving_time = time.time() - start_time
print(f"Model saved in {model_saving_time:.2f} seconds.")

# Evaluate the model
print("Evaluating the model...")
def get_predictions(model, data_loader):
    model = model.eval()
    reviews = []
    predictions = []
    prediction_probs = []
    real_values = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            _, preds = torch.max(outputs.logits, dim=1)

            reviews.extend(batch['review_text'])
            predictions.extend(preds)
            prediction_probs.extend(outputs.logits)
            real_values.extend(labels)

    predictions = torch.stack(predictions).cpu()
    real_values = torch.stack(real_values).cpu()
    return reviews, predictions, prediction_probs, real_values

# Get predictions for validation set
start_time = time.time()
y_val_review_texts, y_val_pred, y_val_pred_probs, y_val_test = get_predictions(
    model,
    val_loader
)
val_predictions_time = time.time() - start_time
print(f'Validation predictions completed in {val_predictions_time:.2f} seconds.')
print('Validation Classification Report:')
print(classification_report(y_val_test, y_val_pred))
print('Validation Accuracy:', accuracy_score(y_val_test, y_val_pred))



Using device: cuda
Starting training...
Epoch 1/3 completed in 2856.08 seconds.
Train loss: 0.16679169678519248
Val loss: 0.13235460030972027
Epoch 2/3 completed in 2869.85 seconds.
Train loss: 0.12774182451862143
Val loss: 0.12698004063256085
Epoch 3/3 completed in 2866.92 seconds.
Train loss: 0.11188854023494059
Val loss: 0.1196166014581453
Saving the model...
Model saved in 3.07 seconds.
Evaluating the model...
Validation predictions completed in 363.83 seconds.
Validation Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.96      0.96     39896
           1       0.96      0.96      0.96     40104

    accuracy                           0.96     80000
   macro avg       0.96      0.96      0.96     80000
weighted avg       0.96      0.96      0.96     80000

Validation Accuracy: 0.9569375


In [None]:
import numpy as np
import torch
from torch.utils.data import Subset, DataLoader
from transformers import DistilBertTokenizer, DistilBertForSequenceClassification
from sklearn.metrics import classification_report, accuracy_score

# Path to the saved model directory
model_directory = "/content/saved_model_distilbert"

# Load the tokenizer
tokenizer = DistilBertTokenizer.from_pretrained(model_directory)
print("Tokenizer loaded.")

# Load the model
model = DistilBertForSequenceClassification.from_pretrained(model_directory)
print("Model loaded.")

# Move the model to the appropriate device (CPU or GPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
print(f"Using device: {device}")

# Function to get predictions
def get_predictions(model, data_loader):
    model = model.eval()
    reviews = []
    predictions = []
    prediction_probs = []
    real_values = []

    with torch.no_grad():
        for batch in data_loader:
            input_ids = batch['input_ids'].to(device)
            attention_mask = batch['attention_mask'].to(device)
            labels = batch['labels'].to(device)

            outputs = model(
                input_ids=input_ids,
                attention_mask=attention_mask
            )
            _, preds = torch.max(outputs.logits, dim=1)

            reviews.extend(batch['review_text'])
            predictions.extend(preds)
            prediction_probs.extend(outputs.logits)
            real_values.extend(labels)

    predictions = torch.stack(predictions).cpu()
    real_values = torch.stack(real_values).cpu()
    return reviews, predictions, prediction_probs, real_values

# Get predictions for validation set
start_time = time.time()
y_val_review_texts, y_val_pred, y_val_pred_probs, y_val_test = get_predictions(
    model,
    val_loader
)
val_predictions_time = time.time() - start_time
print(f'Validation predictions completed in {val_predictions_time:.2f} seconds.')
print('Validation Classification Report:')
print(classification_report(y_val_test, y_val_pred))
print('Validation Accuracy:', accuracy_score(y_val_test, y_val_pred))

# Randomly select 30% of the test dataset
test_indices = np.random.choice(len(test_dataset), size=int(0.3 * len(test_dataset)), replace=False)
test_subset = Subset(test_dataset, test_indices)
test_loader_30 = DataLoader(test_subset, batch_size=128, shuffle=False)

# Get predictions for the 30% test subset
start_time = time.time()
y_test_review_texts, y_test_pred, y_test_pred_probs, y_test_labels = get_predictions(
    model,
    test_loader_30
)
test_predictions_time = time.time() - start_time
print(f'Test predictions completed in {test_predictions_time:.2f} seconds.')
print('Test Classification Report:')
print(classification_report(y_test_labels, y_test_pred))
print('Test Accuracy:', accuracy_score(y_test_labels, y_test_pred))

# Displaying results
print("Results for the validation set:")
print(f"Validation Accuracy: {accuracy_score(y_val_test, y_val_pred):.4f}")
print("Classification Report:")
print(classification_report(y_val_test, y_val_pred))

print("\nResults for the test set:")
print(f"Test Accuracy: {accuracy_score(y_test_labels, y_test_pred):.4f}")
print("Classification Report:")
print(classification_report(y_test_labels, y_test_pred))

# Summary of times
print("\nSummary of Times:")
print(f"Data reading time: {data_reading_time:.2f} seconds")
print(f"Data processing time: {data_processing_time:.2f} seconds")
print(f"Dataframe creation time: {dataframe_creation_time:.2f} seconds")
print(f"Tokenizer loading time: {tokenizer_loading_time:.2f} seconds")
print(f"Dataset creation time: {dataset_creation_time:.2f} seconds")
print(f"Data loader creation time: {data_loader_creation_time:.2f} seconds")
print(f"Model loading time: {model_loading_time:.2f} seconds")
print(f"Model saving time: {model_saving_time:.2f} seconds")
print(f"Validation predictions time: {val_predictions_time:.2f} seconds")
print(f"Test predictions time: {test_predictions_time:.2f} seconds")
print(f"Epoch times: {epoch_times}")


Tokenizer loaded.
Model loaded.
Using device: cuda
Validation predictions completed in 363.81 seconds.
Validation Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.96      0.96     39896
           1       0.96      0.96      0.96     40104

    accuracy                           0.96     80000
   macro avg       0.96      0.96      0.96     80000
weighted avg       0.96      0.96      0.96     80000

Validation Accuracy: 0.9569375
Test predictions completed in 4808.27 seconds.
Test Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.96      0.96    539999
           1       0.96      0.96      0.96    540001

    accuracy                           0.96   1080000
   macro avg       0.96      0.96      0.96   1080000
weighted avg       0.96      0.96      0.96   1080000

Test Accuracy: 0.9566342592592593
Results for the validation set:
Validation Accuracy: 0.9569
Classific

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Confusion matrix for validation set
start_time = time.time()
val_cm = confusion_matrix(y_val_labels, y_val_pred)
val_disp = ConfusionMatrixDisplay(confusion_matrix=val_cm)
val_disp.plot(cmap=plt.cm.Blues)
plt.title("Validation Confusion Matrix")
plt.show()
print(f"Validation confusion matrix generated in {time.time() - start_time:.2f} seconds.")

# Confusion matrix for test set
start_time = time.time()
test_cm = confusion_matrix(y_test_labels, y_test_pred)
test_disp = ConfusionMatrixDisplay(confusion_matrix=test_cm)
test_disp.plot(cmap=plt.cm.Blues)
plt.title("Test Confusion Matrix")
plt.show()
print(f"Test confusion matrix generated in {time.time() - start_time:.2f} seconds.")

NameError: name 'time' is not defined

In [None]:
from sklearn.metrics import classification_report, precision_recall_curve, roc_curve, auc
import matplotlib.pyplot as plt

# Function to plot precision-recall curve
def plot_precision_recall(y_true, y_pred_probs):
    precision, recall, _ = precision_recall_curve(y_true, y_pred_probs[:, 1])
    plt.figure()
    plt.plot(recall, precision, marker='.')
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Precision-Recall Curve')
    plt.show()

# Function to plot ROC curve
def plot_roc_curve(y_true, y_pred_probs):
    fpr, tpr, _ = roc_curve(y_true, y_pred_probs[:, 1])
    roc_auc = auc(fpr, tpr)
    plt.figure()
    plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic')
    plt.legend(loc="lower right")
    plt.show()

# Print classification report for validation set
print("Validation Classification Report:")
print(classification_report(y_val_labels, y_val_pred))

# Print classification report for test set
print("Test Classification Report:")
print(classification_report(y_test_labels, y_test_pred))

# Plot precision-recall curve for test set
plot_precision_recall(y_test_labels, y_test_pred_probs)

# Plot ROC curve for test set
plot_roc_curve(y_test_labels, y_test_pred_probs)


In [None]:
# Summary of times
print("\nSummary of Times:")
print(f"Data reading time: {data_reading_time:.2f} seconds")
print(f"Data processing time: {data_processing_time:.2f} seconds")
print(f"Dataframe creation time: {dataframe_creation_time:.2f} seconds")
print(f"Tokenizer loading time: {tokenizer_loading_time:.2f} seconds")
print(f"Dataset creation time: {dataset_creation_time:.2f} seconds")
print(f"Data loader creation time: {data_loader_creation_time:.2f} seconds")
print(f"Model loading time: {model_loading_time:.2f} seconds")
print(f"Model saving time: {model_saving_time:.2f} seconds")
print(f"Validation predictions time: {val_predictions_time:.2f} seconds")
print(f"Test predictions time: {test_predictions_time:.2f} seconds")
print(f"Epoch times: {epoch_times}")

In [None]:
from transformers import GPT2Tokenizer, GPT2ForSequenceClassification
import numpy as np
import torch
from torch.utils.data import Subset, DataLoader
from sklearn.metrics import classification_report, accuracy_score

# Load GPT-2 tokenizer
print("Loading GPT-2 tokenizer...")
start_time = time.time()
gpt2_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
gpt2_tokenizer.pad_token = gpt2_tokenizer.eos_token  # Set the pad token to eos token for GPT-2
gpt2_tokenizer_loading_time = time.time() - start_time

# Creating datasets for GPT-2
print("Creating datasets for GPT-2...")
start_time = time.time()
gpt2_train_dataset = ReviewsDataset(
    reviews=train_df.review.to_numpy(),
    labels=train_df.label.to_numpy(),
    tokenizer=gpt2_tokenizer,
    max_len=128  # Same max length as used for RoBERTa
)

gpt2_val_dataset = ReviewsDataset(
    reviews=val_df.review.to_numpy(),
    labels=val_df.label.to_numpy(),
    tokenizer=gpt2_tokenizer,
    max_len=128
)

gpt2_test_dataset = ReviewsDataset(
    reviews=test_df.review.to_numpy(),
    labels=test_df.label.to_numpy(),
    tokenizer=gpt2_tokenizer,
    max_len=128
)
gpt2_dataset_creation_time = time.time() - start_time

# Creating data loaders for GPT-2
print("Creating data loaders for GPT-2...")
start_time = time.time()
gpt2_train_loader = DataLoader(gpt2_train_dataset, batch_size=batch_size, shuffle=True)
gpt2_val_loader = DataLoader(gpt2_val_dataset, batch_size=batch_size)
gpt2_test_loader = DataLoader(gpt2_test_dataset, batch_size=batch_size)
gpt2_data_loader_creation_time = time.time() - start_time

# Load pre-trained GPT-2 model
print("Loading pre-trained GPT-2 model...")
start_time = time.time()
gpt2_model = GPT2ForSequenceClassification.from_pretrained('gpt2', num_labels=2)
gpt2_model.config.pad_token_id = gpt2_tokenizer.pad_token_id
gpt2_model_loading_time = time.time() - start_time

# Unfreezing the last two layers for GPT-2
for param in gpt2_model.transformer.parameters():
    param.requires_grad = False

for param in gpt2_model.transformer.h[-2:].parameters():
    param.requires_grad = True

# Set up optimizer and scheduler for GPT-2
gpt2_optimizer = AdamW(gpt2_model.parameters(), lr=2e-5)
gpt2_total_steps = len(gpt2_train_loader) * 3  # Assuming 3 epochs
gpt2_scheduler = get_linear_schedule_with_warmup(
    gpt2_optimizer,
    num_warmup_steps=0,
    num_training_steps=gpt2_total_steps
)


Loading GPT-2 tokenizer...
Creating datasets for GPT-2...
Creating data loaders for GPT-2...
Loading pre-trained GPT-2 model...


Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Training the GPT-2 model
gpt2_model = gpt2_model.to(device)

print("Starting GPT-2 training...")
gpt2_epoch_times = []
for epoch in range(EPOCHS):
    epoch_start_time = time.time()
    train_loss = train_epoch(gpt2_model, gpt2_train_loader, gpt2_optimizer, device, gpt2_scheduler, accumulation_steps)
    val_loss = eval_model(gpt2_model, gpt2_val_loader, device)
    epoch_end_time = time.time()
    epoch_duration = epoch_end_time - epoch_start_time
    gpt2_epoch_times.append(epoch_duration)

    print(f'GPT-2 Epoch {epoch + 1}/{EPOCHS} completed in {epoch_duration:.2f} seconds.')
    print(f'GPT-2 Train loss: {train_loss}')
    print(f'GPT-2 Val loss: {val_loss}')

# Save the GPT-2 model
print("Saving the GPT-2 model...")
start_time = time.time()
gpt2_model.save_pretrained("C:/Users/santo/Desktop/AUEB/ML Content Analytics/Amazon reviews/saved_model_gpt2")
gpt2_tokenizer.save_pretrained("C:/Users/santo/Desktop/AUEB/ML Content Analytics/Amazon reviews/saved_model_gpt2")
gpt2_model_saving_time = time.time() - start_time
print(f"GPT-2 Model saved in {gpt2_model_saving_time:.2f} seconds.")

# Evaluate the GPT-2 model
print("Evaluating the GPT-2 model...")
start_time = time.time()
y_val_review_texts, y_val_pred, y_val_pred_probs, y_val_test = get_predictions(
    gpt2_model,
    gpt2_val_loader
)
gpt2_val_predictions_time = time.time() - start_time
print(f'GPT-2 Validation predictions completed in {gpt2_val_predictions_time:.2f} seconds.')
print('GPT-2 Validation Classification Report:')
print(classification_report(y_val_test, y_val_pred))
print('GPT-2 Validation Accuracy:', accuracy_score(y_val_test, y_val_pred))

# Get predictions for GPT-2 test set
start_time = time.time()
y_test_review_texts, y_test_pred, y_test_pred_probs, y_test_labels = get_predictions(
    gpt2_model,
    gpt2_test_loader
)

NameError: name 'device' is not defined

In [None]:
gpt2_test_predictions_time = time.time() - start_time
print(f'GPT-2 Test predictions completed in {gpt2_test_predictions_time:.2f} seconds.')
print('GPT-2 Test Classification Report:')
print(classification_report(y_test_labels, y_test_pred))
print('GPT-2 Test Accuracy:', accuracy_score(y_test_labels, y_test_pred))

# Displaying results for GPT-2
print("GPT-2 Results for the validation set:")
print(f"GPT-2 Validation Accuracy: {accuracy_score(y_val_test, y_val_pred):.4f}")
print("GPT-2 Classification Report:")
print(classification_report(y_val_test, y_val_pred))

print("\nGPT-2 Results for the test set:")
print(f"GPT-2 Test Accuracy: {accuracy_score(y_test_labels, y_test_pred):.4f}")
print("GPT-2 Classification Report:")
print(classification_report(y_test_labels, y_test_pred))

# Summary of times for GPT-2
print("\nGPT-2 Summary of Times:")
print(f"GPT-2 Tokenizer loading time: {gpt2_tokenizer_loading_time:.2f} seconds")
print(f"GPT-2 Dataset creation time: {gpt2_dataset_creation_time:.2f} seconds")
print(f"GPT-2 Data loader creation time: {gpt2_data_loader_creation_time:.2f} seconds")
print(f"GPT-2 Model loading time: {gpt2_model_loading_time:.2f} seconds")
print(f"GPT-2 Model saving time: {gpt2_model_saving_time:.2f} seconds")
print(f"GPT-2 Validation predictions time: {gpt2_val_predictions_time:.2f} seconds")
print(f"GPT-2 Test predictions time: {gpt2_test_predictions_time:.2f} seconds")
print(f"GPT-2 Epoch times: {gpt2_epoch_times}")