<a href="https://colab.research.google.com/github/Ahsan-folium/ai-intern-week04-deep-learnin/blob/main/deepLearning_miniProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# !pip install transformers datasets
!pip install evaluate

In [None]:
from datasets import load_dataset
from transformers import (
AutoTokenizer,
AutoModelForSequenceClassification, TrainingArguments, Trainer
)

import numpy as np
import evaluate

In [None]:
# loading the imdb dataset
dataset = load_dataset("imdb")
print(dataset)

In [None]:
# loading the tokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

In [None]:
# now we need to tokenize so each review text converts into tokens with ids
def tokenizer_function(dataset):
  return tokenizer(dataset["text"], padding=True, truncation=True)


tokenized_datasets = dataset.map(tokenizer_function, batched=True)

In [None]:
# now we load the model
model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2)



In [None]:
# evaluation metrics
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    return accuracy.compute(predictions=predictions, references=labels)

In [None]:
training_args = TrainingArguments(
    output_dir="./results",
    eval_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=2,
    weight_decay=0.01,
)

In [None]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets["train"].shuffle(seed=42).select(range(2000)),
    eval_dataset=tokenized_datasets["test"].shuffle(seed=42).select(range(1000)),
    tokenizer=tokenizer ,
    compute_metrics=compute_metrics,
)

display(trainer)

# we are choosing a small subset of 2000 samples because the enitre set will take much longer

In [None]:
trainer.train()

In [None]:
trainer.evaluate()


In [None]:
import torch


# Put model on GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)


In [None]:


def predict_sentiment(test_text):

  # Tokenize and move inputs to GPU
  inputs = tokenizer(test_text, return_tensors="pt", truncation=True, padding=True).to(device)

  # Run model
  outputs = model(**inputs)
  pred = outputs.logits.argmax(-1).item()

  print("Sentiment:", "positive" if pred == 1 else "negative")


In [None]:
text = "I absolutely loved this movie! The acting was fantastic."
predict_sentiment(text)

text2 = "this movie is a waste of time"

predict_sentiment(text2)