# Import modules and dependencies

In [1]:
! pip install -q transformers[sentencepiece] datasets evaluate 

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import transformers

import evaluate
from datasets import Dataset, DatasetDict
from transformers import  AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer, EarlyStoppingCallback

# Loading Data

In [3]:
df = pd.read_csv("/kaggle/input/nlp-project-data/film_details.csv")
df

Unnamed: 0,Title,Category,Url,Metascore,Number of critic reviewers,User score,Number of user reviewers,Plot summary,Genres
0,Dekalog (1988),movie,https://www.metacritic.com/movie/dekalog-1988/,100,13,100,112,This masterwork by Krzysztof Kieślowski is one...,['Drama']
1,The Godfather,movie,https://www.metacritic.com/movie/the-godfather/,100,16,100,4082,Francis Ford Coppola's epic features Marlon Br...,"['Crime', 'Drama']"
2,Lawrence of Arabia (re-release),movie,https://www.metacritic.com/movie/lawrence-of-a...,100,8,100,442,The 40th anniversary re-release of David Lean'...,"['Adventure', 'Biography', 'Drama', 'War']"
3,The Leopard (re-release),movie,https://www.metacritic.com/movie/the-leopard-r...,100,12,100,84,"Set in Sicily in 1860, Luchino Visconti's spec...","['Drama', 'History']"
4,The Conformist,movie,https://www.metacritic.com/movie/the-conformis...,100,11,100,105,"Set in Rome in the 1930s, this re-release of B...",['Drama']
...,...,...,...,...,...,...,...,...,...
15149,Cavemen,tv,https://www.metacritic.com/tv/cavemen/,19,13,19,6,"Cavemen revolves around Joel, his younger brot...","['Comedy', 'Sci-Fi']"
15150,Work It,tv,https://www.metacritic.com/tv/work-it/,19,22,19,44,"After they are laid off, Lee Standish (Ben Kol...",['Comedy']
15151,Category 7: The End of the World,tv,https://www.metacritic.com/tv/category-7-the-e...,18,11,18,7,"""Category 7: The End of the World"" picks up wh...","['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th..."
15152,Stalker,tv,https://www.metacritic.com/tv/stalker/,17,24,17,137,Lt. Beth Davis (Maggie Q) leads the Threat Ass...,"['Crime', 'Drama', 'Thriller']"


In [4]:
df = df.dropna().reset_index(drop=True)
df

Unnamed: 0,Title,Category,Url,Metascore,Number of critic reviewers,User score,Number of user reviewers,Plot summary,Genres
0,Dekalog (1988),movie,https://www.metacritic.com/movie/dekalog-1988/,100,13,100,112,This masterwork by Krzysztof Kieślowski is one...,['Drama']
1,The Godfather,movie,https://www.metacritic.com/movie/the-godfather/,100,16,100,4082,Francis Ford Coppola's epic features Marlon Br...,"['Crime', 'Drama']"
2,Lawrence of Arabia (re-release),movie,https://www.metacritic.com/movie/lawrence-of-a...,100,8,100,442,The 40th anniversary re-release of David Lean'...,"['Adventure', 'Biography', 'Drama', 'War']"
3,The Leopard (re-release),movie,https://www.metacritic.com/movie/the-leopard-r...,100,12,100,84,"Set in Sicily in 1860, Luchino Visconti's spec...","['Drama', 'History']"
4,The Conformist,movie,https://www.metacritic.com/movie/the-conformis...,100,11,100,105,"Set in Rome in the 1930s, this re-release of B...",['Drama']
...,...,...,...,...,...,...,...,...,...
15149,Cavemen,tv,https://www.metacritic.com/tv/cavemen/,19,13,19,6,"Cavemen revolves around Joel, his younger brot...","['Comedy', 'Sci-Fi']"
15150,Work It,tv,https://www.metacritic.com/tv/work-it/,19,22,19,44,"After they are laid off, Lee Standish (Ben Kol...",['Comedy']
15151,Category 7: The End of the World,tv,https://www.metacritic.com/tv/category-7-the-e...,18,11,18,7,"""Category 7: The End of the World"" picks up wh...","['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th..."
15152,Stalker,tv,https://www.metacritic.com/tv/stalker/,17,24,17,137,Lt. Beth Davis (Maggie Q) leads the Threat Ass...,"['Crime', 'Drama', 'Thriller']"


In [5]:
genre_list = []

indices_to_drop = []
for idx, genres in enumerate(df.Genres.to_list()):
  genres_list = eval(genres)
  if len(genres_list):
    genre_list.append(genres_list[0])
  else:
    indices_to_drop.append(idx)

df = df.drop(indices_to_drop).reset_index(drop=True)
df.shape

df['Single_Genre'] = genre_list

df

Unnamed: 0,Title,Category,Url,Metascore,Number of critic reviewers,User score,Number of user reviewers,Plot summary,Genres,Single_Genre
0,Dekalog (1988),movie,https://www.metacritic.com/movie/dekalog-1988/,100,13,100,112,This masterwork by Krzysztof Kieślowski is one...,['Drama'],Drama
1,The Godfather,movie,https://www.metacritic.com/movie/the-godfather/,100,16,100,4082,Francis Ford Coppola's epic features Marlon Br...,"['Crime', 'Drama']",Crime
2,Lawrence of Arabia (re-release),movie,https://www.metacritic.com/movie/lawrence-of-a...,100,8,100,442,The 40th anniversary re-release of David Lean'...,"['Adventure', 'Biography', 'Drama', 'War']",Adventure
3,The Leopard (re-release),movie,https://www.metacritic.com/movie/the-leopard-r...,100,12,100,84,"Set in Sicily in 1860, Luchino Visconti's spec...","['Drama', 'History']",Drama
4,The Conformist,movie,https://www.metacritic.com/movie/the-conformis...,100,11,100,105,"Set in Rome in the 1930s, this re-release of B...",['Drama'],Drama
...,...,...,...,...,...,...,...,...,...,...
15149,Cavemen,tv,https://www.metacritic.com/tv/cavemen/,19,13,19,6,"Cavemen revolves around Joel, his younger brot...","['Comedy', 'Sci-Fi']",Comedy
15150,Work It,tv,https://www.metacritic.com/tv/work-it/,19,22,19,44,"After they are laid off, Lee Standish (Ben Kol...",['Comedy'],Comedy
15151,Category 7: The End of the World,tv,https://www.metacritic.com/tv/category-7-the-e...,18,11,18,7,"""Category 7: The End of the World"" picks up wh...","['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th...",Action
15152,Stalker,tv,https://www.metacritic.com/tv/stalker/,17,24,17,137,Lt. Beth Davis (Maggie Q) leads the Threat Ass...,"['Crime', 'Drama', 'Thriller']",Crime


In [6]:
genre_count = df['Single_Genre'].value_counts()
threshold = int(len(df) * 0.005) #Rare genre threshold
rare_genres = [ cat for cat, count in genre_count.items() if count < threshold]

In [7]:
print(len(rare_genres))
print()
print(rare_genres[:3])

19

['Fantasy', 'Reality-TV', 'Thriller']


In [8]:
rare_indices_to_drop = [idx for idx, row in df.iterrows() if row['Single_Genre'] in rare_genres]
len(rare_indices_to_drop)

270

In [9]:
df = df.drop(rare_indices_to_drop).reset_index(drop=True)
df.shape

(14884, 10)

In [10]:
df['Single_Genre'].value_counts()

Single_Genre
Comedy         3837
Drama          3470
Action         2437
Documentary    1353
Crime          1199
Biography       825
Adventure       632
Animation       605
Horror          526
Name: count, dtype: int64

In [11]:
len(df['Single_Genre'].value_counts())

9

In [12]:
labels = list(set(df.Single_Genre.to_list()))
label_count = len(labels)

print(labels)
print()
print(label_count)

['Drama', 'Documentary', 'Adventure', 'Biography', 'Horror', 'Animation', 'Crime', 'Action', 'Comedy']

9


In [13]:
df

Unnamed: 0,Title,Category,Url,Metascore,Number of critic reviewers,User score,Number of user reviewers,Plot summary,Genres,Single_Genre
0,Dekalog (1988),movie,https://www.metacritic.com/movie/dekalog-1988/,100,13,100,112,This masterwork by Krzysztof Kieślowski is one...,['Drama'],Drama
1,The Godfather,movie,https://www.metacritic.com/movie/the-godfather/,100,16,100,4082,Francis Ford Coppola's epic features Marlon Br...,"['Crime', 'Drama']",Crime
2,Lawrence of Arabia (re-release),movie,https://www.metacritic.com/movie/lawrence-of-a...,100,8,100,442,The 40th anniversary re-release of David Lean'...,"['Adventure', 'Biography', 'Drama', 'War']",Adventure
3,The Leopard (re-release),movie,https://www.metacritic.com/movie/the-leopard-r...,100,12,100,84,"Set in Sicily in 1860, Luchino Visconti's spec...","['Drama', 'History']",Drama
4,The Conformist,movie,https://www.metacritic.com/movie/the-conformis...,100,11,100,105,"Set in Rome in the 1930s, this re-release of B...",['Drama'],Drama
...,...,...,...,...,...,...,...,...,...,...
14879,Cavemen,tv,https://www.metacritic.com/tv/cavemen/,19,13,19,6,"Cavemen revolves around Joel, his younger brot...","['Comedy', 'Sci-Fi']",Comedy
14880,Work It,tv,https://www.metacritic.com/tv/work-it/,19,22,19,44,"After they are laid off, Lee Standish (Ben Kol...",['Comedy'],Comedy
14881,Category 7: The End of the World,tv,https://www.metacritic.com/tv/category-7-the-e...,18,11,18,7,"""Category 7: The End of the World"" picks up wh...","['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th...",Action
14882,Stalker,tv,https://www.metacritic.com/tv/stalker/,17,24,17,137,Lt. Beth Davis (Maggie Q) leads the Threat Ass...,"['Crime', 'Drama', 'Thriller']",Crime


In [14]:
df.describe(include='object')

Unnamed: 0,Title,Category,Url,Plot summary,Genres,Single_Genre
count,14884,14884,14884,14884,14884,14884
unique,14253,2,14824,14822,1746,9
top,Robin Hood,movie,https://www.metacritic.com/movie/battle-for-te...,"A comic spoof in the tradition of ""Airplane,"" ...",['Drama'],Comedy
freq,4,12403,2,2,1063,3837


# Data Processing

In [15]:
ds = Dataset.from_pandas(df)
ds

Dataset({
    features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre'],
    num_rows: 14884
})

In [16]:
ds[0]

{'Title': 'Dekalog (1988)',
 'Category': 'movie',
 'Url': 'https://www.metacritic.com/movie/dekalog-1988/',
 'Metascore': 100,
 'Number of critic reviewers': 13,
 'User score': 100,
 'Number of user reviewers': 112,
 'Plot summary': 'This masterwork by Krzysztof Kieślowski is one of the twentieth century’s greatest achievements in visual storytelling. Originally made for Polish television, Dekalog focuses on the residents of a housing complex in late-Communist Poland, whose lives become subtly intertwined as they face emotional dilemmas that are at once deeply person...',
 'Genres': "['Drama']",
 'Single_Genre': 'Drama'}

# [HF Model Link](https://huggingface.co/xlnet/xlnet-base-cased)

In [17]:
model_name = "xlnet/xlnet-base-cased"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer

config.json:   0%|          | 0.00/760 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/798k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.38M [00:00<?, ?B/s]



XLNetTokenizerFast(name_or_path='xlnet/xlnet-base-cased', vocab_size=32000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '<sep>', 'pad_token': '<pad>', 'cls_token': '<cls>', 'mask_token': '<mask>', 'additional_special_tokens': ['<eop>', '<eod>']}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("</s>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	3: AddedToken("<cls>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	4: AddedToken("<sep>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	5: AddedToken("<p

In [18]:
tokenizer.tokenize(ds[0]['Plot summary'][:150])

['▁This',
 '▁master',
 'work',
 '▁by',
 '▁K',
 'r',
 'zy',
 's',
 'z',
 't',
 'of',
 '▁Ki',
 'es',
 'l',
 'owski',
 '▁is',
 '▁one',
 '▁of',
 '▁the',
 '▁twentieth',
 '▁century',
 '’',
 's',
 '▁greatest',
 '▁achievements',
 '▁in',
 '▁visual',
 '▁storytelling',
 '.',
 '▁Originally',
 '▁made',
 '▁for',
 '▁Polish',
 '▁',
 'tel']

In [19]:
def tokenize_desc(x) :
  return tokenizer(x['Plot summary'], truncation=True, padding=True)

In [20]:
tokenized_ds = ds.map(tokenize_desc, batched=True)
tokenized_ds

Map:   0%|          | 0/14884 [00:00<?, ? examples/s]

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Dataset({
    features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 14884
})

In [21]:
tokenized_ds[0]

{'Title': 'Dekalog (1988)',
 'Category': 'movie',
 'Url': 'https://www.metacritic.com/movie/dekalog-1988/',
 'Metascore': 100,
 'Number of critic reviewers': 13,
 'User score': 100,
 'Number of user reviewers': 112,
 'Plot summary': 'This masterwork by Krzysztof Kieślowski is one of the twentieth century’s greatest achievements in visual storytelling. Originally made for Polish television, Dekalog focuses on the residents of a housing complex in late-Communist Poland, whose lives become subtly intertwined as they face emotional dilemmas that are at once deeply person...',
 'Genres': "['Drama']",
 'Single_Genre': 'Drama',
 'input_ids': [5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  122,
  2822,
  3552,
  37,
  574,
  213,
  5999,
  23,
  553,
  46,
  1168,
  3108,
  202,
  368,
  13034,
  27,
  65,
  20,
  18,
  18882,
  997,
  165,
  23,
  3385,
  10396,
  25

In [22]:
row = tokenized_ds[0]

print("Plot")
print()
print(row['Plot summary'])
print()
print("====================================================================================================")
print()
print("Input ids")
print()
print(row['input_ids'])

Plot

This masterwork by Krzysztof Kieślowski is one of the twentieth century’s greatest achievements in visual storytelling. Originally made for Polish television, Dekalog focuses on the residents of a housing complex in late-Communist Poland, whose lives become subtly intertwined as they face emotional dilemmas that are at once deeply person...


Input ids

[5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 122, 2822, 3552, 37, 574, 213, 5999, 23, 553, 46, 1168, 3108, 202, 368, 13034, 27, 65, 20, 18, 18882, 997, 165, 23, 3385, 10396, 25, 4689, 24909, 9, 11234, 140, 28, 4726, 892, 19, 772, 1041, 6234, 7712, 31, 18, 1719, 20, 24, 2321, 1881, 25, 471, 13, 6689, 8393, 677, 3299, 19, 795, 1166, 401, 31536, 30835, 34, 63, 423, 4591, 17867, 23, 29, 41, 38, 497, 4875, 601, 9, 9, 9, 4, 3]


In [23]:
# # Vocabulary index, Numericalization like we did in ULMFit
# tokenizer.vocab['masterwork']

# Categorize

In [24]:
labels

['Drama',
 'Documentary',
 'Adventure',
 'Biography',
 'Horror',
 'Animation',
 'Crime',
 'Action',
 'Comedy']

In [25]:
labels.index('Drama')

0

In [26]:
def categorize(x):
  return {"labels": [labels.index(genre) for genre in x['Single_Genre']]}

In [27]:
categorized_ds = tokenized_ds.map(categorize, batched=True)
categorized_ds

Map:   0%|          | 0/14884 [00:00<?, ? examples/s]

Dataset({
    features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 14884
})

In [28]:
categorized_ds[0]

{'Title': 'Dekalog (1988)',
 'Category': 'movie',
 'Url': 'https://www.metacritic.com/movie/dekalog-1988/',
 'Metascore': 100,
 'Number of critic reviewers': 13,
 'User score': 100,
 'Number of user reviewers': 112,
 'Plot summary': 'This masterwork by Krzysztof Kieślowski is one of the twentieth century’s greatest achievements in visual storytelling. Originally made for Polish television, Dekalog focuses on the residents of a housing complex in late-Communist Poland, whose lives become subtly intertwined as they face emotional dilemmas that are at once deeply person...',
 'Genres': "['Drama']",
 'Single_Genre': 'Drama',
 'input_ids': [5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  5,
  122,
  2822,
  3552,
  37,
  574,
  213,
  5999,
  23,
  553,
  46,
  1168,
  3108,
  202,
  368,
  13034,
  27,
  65,
  20,
  18,
  18882,
  997,
  165,
  23,
  3385,
  10396,
  25

In [29]:
row = categorized_ds[0]
row['labels']

0

# Data Splitting

In [30]:
split_ds = categorized_ds.train_test_split(0.1, seed=42)
split_ds

DatasetDict({
    train: Dataset({
        features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 13395
    })
    test: Dataset({
        features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 1489
    })
})

# Modeling

In [31]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=label_count)
model

pytorch_model.bin:   0%|          | 0.00/467M [00:00<?, ?B/s]

Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet/xlnet-base-cased and are newly initialized: ['logits_proj.bias', 'logits_proj.weight', 'sequence_summary.summary.bias', 'sequence_summary.summary.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


XLNetForSequenceClassification(
  (transformer): XLNetModel(
    (word_embedding): Embedding(32000, 768)
    (layer): ModuleList(
      (0-11): 12 x XLNetLayer(
        (rel_attn): XLNetRelativeAttention(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (dropout): Dropout(p=0.1, inplace=False)
        )
        (ff): XLNetFeedForward(
          (layer_norm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
          (layer_1): Linear(in_features=768, out_features=3072, bias=True)
          (layer_2): Linear(in_features=3072, out_features=768, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (activation_function): GELUActivation()
        )
        (dropout): Dropout(p=0.1, inplace=False)
      )
    )
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (sequence_summary): SequenceSummary(
    (summary): Linear(in_features=768, out_features=768, bias=True)
    (activation): Tanh()
    (first_dropout): Identity()
    (last

In [32]:
batch_size = 32
epochs = 100
lr = 3.75e-4

def accuracy(eval_preds):
  metric = evaluate.load("accuracy")
  logits, labels = eval_preds
  predictions = np.argmax(logits, axis=-1)
  return metric.compute(predictions=predictions, references=labels)

In [47]:
training_args = TrainingArguments(
    "models",
    learning_rate = lr,
    warmup_ratio = 0.1,
    lr_scheduler_type='cosine',
    fp16=True,
    eval_strategy='epoch',
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    weight_decay=0.01,
    report_to='none',
    load_best_model_at_end = True,
    save_strategy='epoch',
    save_safetensors = False 
    # save_steps=200,
)

In [50]:
trainer = Trainer(
    model,
    training_args,
    train_dataset = split_ds['train'],
    eval_dataset = split_ds['test'],
    tokenizer = tokenizer,
    compute_metrics = accuracy,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3, early_stopping_threshold=0.0001)]
)

In [51]:
train_results = trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.646983,0.588314
2,0.584300,1.220594,0.581598
3,0.915000,1.338085,0.554735
4,0.805500,1.389483,0.551377
5,0.734700,1.583763,0.538617


In [52]:
print(train_results)

TrainOutput(global_step=2095, training_loss=0.7623638289640513, metrics={'train_runtime': 1106.9935, 'train_samples_per_second': 1210.034, 'train_steps_per_second': 37.85, 'total_flos': 4640800533679638.0, 'train_loss': 0.7623638289640513, 'epoch': 5.0})


In [53]:
metrics = train_results.metrics
max_train_steps = len(trainer.state.log_history) * trainer.args.per_device_train_batch_size
metrics["train_samples"] = max_train_steps

trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

# Evaluate and log best model
metrics = trainer.evaluate()
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

***** train metrics *****
  epoch                    =        5.0
  total_flos               =  4322082GF
  train_loss               =     0.7624
  train_runtime            = 0:18:26.99
  train_samples            =        320
  train_samples_per_second =   1210.034
  train_steps_per_second   =      37.85


***** eval metrics *****
  epoch                   =        5.0
  eval_accuracy           =     0.5816
  eval_loss               =     1.2206
  eval_runtime            = 0:00:09.06
  eval_samples_per_second =    164.228
  eval_steps_per_second   =      5.184


In [54]:
# train_losses = [log['train_loss'] for log in trainer.state.log_history]
# eval_losses = [log['eval_loss'] for log in trainer.state.log_history]

# plt.figure(figsize=(10, 6))
# plt.plot(train_losses, label='Train Loss')
# plt.plot(eval_losses, label='Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()
# plt.title('Training and Validation Loss')
# plt.show()

In [55]:
trainer.save_model('models/film_genre_classifier_xlnet-base-cased')

In [56]:
import shutil
shutil.make_archive("film_genre_classifier_xlnet-base-cased", 'zip', 'models')

'/kaggle/working/film_genre_classifier_xlnet-base-cased.zip'