In [None]:
!nvidia-smi

In [None]:
# Install PyTorch 2.0.1 with CUDA 11.1 (cu118)
!pip install -q torch==2.0.1+cu118 -f https://download.pytorch.org/whl/cu118/torch_stable.html


In [None]:
# Install tokenizers 0.10.1
!pip install -q -U tokenizers==0.10.1 > /dev/null

# Install transformers 4.11.2
!pip install -q -U transformers==4.11.2 > /dev/null

# Install simpletransformers 0.61.14
!pip install -q -U simpletransformers==0.61.14 > /dev/null

In [None]:
import numpy as np
import pandas as pd
import os, json, gc, re, random
from tqdm.notebook import tqdm
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline
import plotly.express as px

import warnings
warnings.filterwarnings("ignore")

import logging
logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)


In [None]:
import torch, transformers, tokenizers
torch.__version__, transformers.__version__, tokenizers.__version__

In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:

df=pd.read_csv('wiki_movie_plots_deduped.csv', low_memory=False)

df

# Dataset is now stored in a Pandas Dataframe

In [None]:
movies_df = df[["Plot", "Title"]]
movies_df.columns = ['input_text', 'target_text']
movies_df

In [None]:
# Drop rows 5 to 7 (inclusive)
movies_df= movies_df.drop(movies_df.index[21000:34886])

eval_dataset = movies_df.sample(frac=0.2, random_state=42)
train_dataset = movies_df.drop(eval_dataset.index)

In [None]:
movies_df

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

In [None]:
from simpletransformers.seq2seq import Seq2SeqModel
model_args = {
    "learning_rate": 1e-4,
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "save_model_every_epoch": False,
    "save_eval_checkpoints": False,
    "max_seq_length": 512,
    "train_batch_size": 8,
    "num_train_epochs": 2,
}
model_name = "facebook/bart-base"

In [None]:
model = Seq2SeqModel(encoder_decoder_type="bart",
                    encoder_decoder_name=model_name,
                    args=model_args)

In [None]:
%%time
model.train_model(train_dataset)



In [None]:
# Evaluate the model
result = model.eval_model(eval_dataset)
print(result)

In [None]:

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Create the parent directory if it doesn't exist
parent_dir = "/content/drive/My Drive/path/to/save"
os.makedirs(parent_dir, exist_ok=True)

model_path = os.path.join(parent_dir, "your_model.pt")
torch.save(model, model_path)


In [None]:

plot=""" Titanic is a 1997 American epic romantic disaster movie. It was directed, written, and co-produced by James Cameron. The movie is about the 1912 sinking of the RMS Titanic. It stars Kate Winslet and Leonardo DiCaprio. The two play characters who are of different social classes. They fall in love after meeting aboard the ship, but it was not good for a rich girl to fall in love with a poor boy in 1912. Titanic runned for 200 days in uae Production of the movie began in 1995. Cameron recorded footage of the real Titanic wreck. The reconstruction of the Titanic was created at Playas de Rosarito in Baja California. To create the sinking of the ship, scale models and computer-generated imagery were used. Paramount Pictures and 20th Century Fox helped with half of the funding for the movie. At the time when the movie was released, it was the most expensive movie ever made. It had a budget of $200 million."""
predicted_title = model.predict([plot])[0]
print(f'Predicted Title: {predicted_title}\n')
print(f'Plot: {plot}\n\n\n')