### Now we run our model on the empty dataset to compare precision compared to actual data

#### Now we load our fine tuned model first before training:

In [2]:
# âœ… Setup
from transformers import T5ForConditionalGeneration, T5Tokenizer

# model_path = "/content/drive/MyDrive/t5-finetuned"
model_path = "./t5_multitask_finetuned"

tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

import torch

def predict(input_text):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    outputs = model.generate(inputs, max_length=64)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

    

### Now we try to predict something on a single manual string before inputting as a whole file


In [5]:
text = "title: In the mystical realm of Somnium, where dreams take on lives of their own, a young apprentice named Lyra discovers she possesses the rare gift of Dreamweaving. With the ability to shape and control the fabric of the subconscious, Lyra is tasked with unraveling the mystery behind a series of dark and foreboding dreams that threaten to consume the dreams of Somnium's inhabitants"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_length=16)
print("Generated Title:", tokenizer.decode(outputs[0], skip_special_tokens=True))

Generated Title: Dreamweaving


In [6]:
text = "genre: In the mystical realm of Somnium, where dreams take on lives of their own, a young apprentice named Lyra discovers she possesses the rare gift of Dreamweaving. With the ability to shape and control the fabric of the subconscious, Lyra is tasked with unraveling the mystery behind a series of dark and foreboding dreams that threaten to consume the dreams of Somnium's inhabitants"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs)
print("Predicted Genre:", tokenizer.decode(outputs[0], skip_special_tokens=True))

Predicted Genre: ['fantasy'


In [7]:
text = "rating: In the mystical realm of Somnium, where dreams take on lives of their own, a young apprentice named Lyra discovers she possesses the rare gift of Dreamweaving. With the ability to shape and control the fabric of the subconscious, Lyra is tasked with unraveling the mystery behind a series of dark and foreboding dreams that threaten to consume the dreams of Somnium's inhabitants"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs)
print("Predicted Rating:", tokenizer.decode(outputs[0], skip_special_tokens=True))

Predicted Rating: 4.13


#### Now we try to predict everything on batch using csv file:

In [None]:
import pandas as pd
from tqdm import tqdm

# Load the CSV file where 'genre' and 'rating' are empty
df = pd.read_csv('./archive/cleaned/merged_all_for_prediction.csv')

# Ensure 'predictedTitle' column exists as the last column
df['predictedTitle'] = ""

# Batch prediction for each row
for idx, row in tqdm(df.iterrows(), total=len(df)):
    # Prepare input for each task
    base_text = f"{row['title']} {row['description']}"
    
    # Predict rating
    rating_input = f"rating: {base_text}"
    rating_pred = predict(rating_input)
    df.at[idx, 'rating'] = rating_pred

    # Predict genre
    genre_input = f"genre: {base_text}"
    genre_pred = predict(genre_input)
    df.at[idx, 'genre'] = genre_pred

    # Predict title
    title_input = f"title: {base_text}"
    title_pred = predict(title_input)
    df.at[idx, 'predictedTitle'] = title_pred

# Save the results to a new CSV file
df.to_csv('./archive/cleaned/merged_all_with_predictions.csv', index=False)


  df.at[idx, 'rating'] = rating_pred
  df.at[idx, 'genre'] = genre_pred
  0%|          | 138/94008 [01:11<14:38:42,  1.78it/s]