### Now we run our model on the empty dataset to compare precision compared to actual data

#### Now we load our fine tuned model first before training:

In [5]:
!pip install -q transformers datasets scikit-learn sentencepiece

In [8]:
!pip install torch




In [10]:
import torch
print(torch.cuda.is_available())

True


In [11]:
# ✅ Setup
from transformers import T5ForConditionalGeneration, T5Tokenizer

# model_path = "/content/drive/MyDrive/t5-finetuned"
model_path = "./t5_multitask_finetuned"

tokenizer = T5Tokenizer.from_pretrained(model_path)
model = T5ForConditionalGeneration.from_pretrained(model_path)

import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

import torch

def predict(input_text):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    inputs = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True).to(device)
    outputs = model.generate(inputs, max_length=64)
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

    

2025-07-05 16:56:10.152241: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-05 16:56:10.166836: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-05 16:56:10.185491: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-05 16:56:10.190980: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-05 16:56:10.204237: I tensorflow/core/platform/cpu_feature_guar

### Now we try to predict something on a single manual string before inputting as a whole file


In [12]:
text = "title: In the mystical realm of Somnium, where dreams take on lives of their own, a young apprentice named Lyra discovers she possesses the rare gift of Dreamweaving. With the ability to shape and control the fabric of the subconscious, Lyra is tasked with unraveling the mystery behind a series of dark and foreboding dreams that threaten to consume the dreams of Somnium's inhabitants"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_length=16)
print("Generated Title:", tokenizer.decode(outputs[0], skip_special_tokens=True))

Generated Title: Dreamweaving


In [13]:
text = "genre: In the mystical realm of Somnium, where dreams take on lives of their own, a young apprentice named Lyra discovers she possesses the rare gift of Dreamweaving. With the ability to shape and control the fabric of the subconscious, Lyra is tasked with unraveling the mystery behind a series of dark and foreboding dreams that threaten to consume the dreams of Somnium's inhabitants"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs)
print("Predicted Genre:", tokenizer.decode(outputs[0], skip_special_tokens=True))

Predicted Genre: ['fantasy'


In [14]:
text = "rating: In the mystical realm of Somnium, where dreams take on lives of their own, a young apprentice named Lyra discovers she possesses the rare gift of Dreamweaving. With the ability to shape and control the fabric of the subconscious, Lyra is tasked with unraveling the mystery behind a series of dark and foreboding dreams that threaten to consume the dreams of Somnium's inhabitants"
inputs = tokenizer(text, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs)
print("Predicted Rating:", tokenizer.decode(outputs[0], skip_special_tokens=True))

Predicted Rating: 4.13


#### Now we try to predict everything on batch using csv file:

In [1]:
import zipfile

zip_path = 't5_multitask_finetuned.zip'  # Change this to your zip file path
extract_to = 't5_multitask_finetuned'  # Change this to your desired output folder

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_to)

print("Extraction complete.")

Extraction complete.


In [17]:
import pandas as pd
from tqdm import tqdm

# Load the CSV file where 'genre' and 'rating' are empty
df = pd.read_csv('merged_all_for_prediction.csv')

# Ensure 'predictedTitle' column exists as the last column
df['predictedTitle'] = ""

# Batch prediction for each row
for idx, row in tqdm(df.iterrows(), total=len(df)):
    # Prepare input for each task
    base_text = f"{row['title']} {row['description']}"
    
    # Predict rating
    rating_input = f"rating: {base_text}"
    rating_pred = predict(rating_input)
    df.at[idx, 'rating'] = rating_pred

    # Predict genre
    genre_input = f"genre: {base_text}"
    genre_pred = predict(genre_input)
    df.at[idx, 'genre'] = genre_pred

    # Predict title
    title_input = f"title: {base_text}"
    title_pred = predict(title_input)
    df.at[idx, 'predictedTitle'] = title_pred

# Save the results to a new CSV file
df.to_csv('merged_all_with_predictions.csv', index=False)


  df.at[idx, 'rating'] = rating_pred
  df.at[idx, 'genre'] = genre_pred
100%|██████████| 94008/94008 [3:53:32<00:00,  6.71it/s]  


In [20]:
import numpy as np
from IPython.display import Audio

# Melody (in Hz): C (Do), D (Re), E (Mi), G (Sol), C (Do octave)
frequencies = [261.63, 293.66, 329.63, 392.00, 523.25]
duration = 0.4  # duration of each note in seconds
sample_rate = 44100

# Generate the full melody
melody = np.concatenate([
    np.sin(2 * np.pi * freq * np.linspace(0, duration, int(sample_rate * duration), False))
    for freq in frequencies
])

# Normalize
melody = melody.astype(np.float32)

# Play melody
Audio(melody, rate=sample_rate, autoplay=True)
