# Import modules and dependencies

In [3]:
! pip install -q transformers[sentencepiece] datasets evaluate

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import transformers

import evaluate
from datasets import Dataset, DatasetDict
from transformers import  AutoModelForSequenceClassification, AutoTokenizer, TrainingArguments, Trainer, EarlyStoppingCallback

# Loading Data

In [5]:
df = pd.read_csv("/kaggle/input/nlp-project-data/film_details.csv")
df

Unnamed: 0,Title,Category,Url,Metascore,Number of critic reviewers,User score,Number of user reviewers,Plot summary,Genres
0,Dekalog (1988),movie,https://www.metacritic.com/movie/dekalog-1988/,100,13,100,112,This masterwork by Krzysztof Kieślowski is one...,['Drama']
1,The Godfather,movie,https://www.metacritic.com/movie/the-godfather/,100,16,100,4082,Francis Ford Coppola's epic features Marlon Br...,"['Crime', 'Drama']"
2,Lawrence of Arabia (re-release),movie,https://www.metacritic.com/movie/lawrence-of-a...,100,8,100,442,The 40th anniversary re-release of David Lean'...,"['Adventure', 'Biography', 'Drama', 'War']"
3,The Leopard (re-release),movie,https://www.metacritic.com/movie/the-leopard-r...,100,12,100,84,"Set in Sicily in 1860, Luchino Visconti's spec...","['Drama', 'History']"
4,The Conformist,movie,https://www.metacritic.com/movie/the-conformis...,100,11,100,105,"Set in Rome in the 1930s, this re-release of B...",['Drama']
...,...,...,...,...,...,...,...,...,...
15149,Cavemen,tv,https://www.metacritic.com/tv/cavemen/,19,13,19,6,"Cavemen revolves around Joel, his younger brot...","['Comedy', 'Sci-Fi']"
15150,Work It,tv,https://www.metacritic.com/tv/work-it/,19,22,19,44,"After they are laid off, Lee Standish (Ben Kol...",['Comedy']
15151,Category 7: The End of the World,tv,https://www.metacritic.com/tv/category-7-the-e...,18,11,18,7,"""Category 7: The End of the World"" picks up wh...","['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th..."
15152,Stalker,tv,https://www.metacritic.com/tv/stalker/,17,24,17,137,Lt. Beth Davis (Maggie Q) leads the Threat Ass...,"['Crime', 'Drama', 'Thriller']"


In [6]:
df = df.dropna().reset_index(drop=True)
df

Unnamed: 0,Title,Category,Url,Metascore,Number of critic reviewers,User score,Number of user reviewers,Plot summary,Genres
0,Dekalog (1988),movie,https://www.metacritic.com/movie/dekalog-1988/,100,13,100,112,This masterwork by Krzysztof Kieślowski is one...,['Drama']
1,The Godfather,movie,https://www.metacritic.com/movie/the-godfather/,100,16,100,4082,Francis Ford Coppola's epic features Marlon Br...,"['Crime', 'Drama']"
2,Lawrence of Arabia (re-release),movie,https://www.metacritic.com/movie/lawrence-of-a...,100,8,100,442,The 40th anniversary re-release of David Lean'...,"['Adventure', 'Biography', 'Drama', 'War']"
3,The Leopard (re-release),movie,https://www.metacritic.com/movie/the-leopard-r...,100,12,100,84,"Set in Sicily in 1860, Luchino Visconti's spec...","['Drama', 'History']"
4,The Conformist,movie,https://www.metacritic.com/movie/the-conformis...,100,11,100,105,"Set in Rome in the 1930s, this re-release of B...",['Drama']
...,...,...,...,...,...,...,...,...,...
15149,Cavemen,tv,https://www.metacritic.com/tv/cavemen/,19,13,19,6,"Cavemen revolves around Joel, his younger brot...","['Comedy', 'Sci-Fi']"
15150,Work It,tv,https://www.metacritic.com/tv/work-it/,19,22,19,44,"After they are laid off, Lee Standish (Ben Kol...",['Comedy']
15151,Category 7: The End of the World,tv,https://www.metacritic.com/tv/category-7-the-e...,18,11,18,7,"""Category 7: The End of the World"" picks up wh...","['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th..."
15152,Stalker,tv,https://www.metacritic.com/tv/stalker/,17,24,17,137,Lt. Beth Davis (Maggie Q) leads the Threat Ass...,"['Crime', 'Drama', 'Thriller']"


In [7]:
genre_list = []

indices_to_drop = []
for idx, genres in enumerate(df.Genres.to_list()):
  genres_list = eval(genres)
  if len(genres_list):
    genre_list.append(genres_list[0])
  else:
    indices_to_drop.append(idx)

df = df.drop(indices_to_drop).reset_index(drop=True)
df.shape

df['Single_Genre'] = genre_list

df

Unnamed: 0,Title,Category,Url,Metascore,Number of critic reviewers,User score,Number of user reviewers,Plot summary,Genres,Single_Genre
0,Dekalog (1988),movie,https://www.metacritic.com/movie/dekalog-1988/,100,13,100,112,This masterwork by Krzysztof Kieślowski is one...,['Drama'],Drama
1,The Godfather,movie,https://www.metacritic.com/movie/the-godfather/,100,16,100,4082,Francis Ford Coppola's epic features Marlon Br...,"['Crime', 'Drama']",Crime
2,Lawrence of Arabia (re-release),movie,https://www.metacritic.com/movie/lawrence-of-a...,100,8,100,442,The 40th anniversary re-release of David Lean'...,"['Adventure', 'Biography', 'Drama', 'War']",Adventure
3,The Leopard (re-release),movie,https://www.metacritic.com/movie/the-leopard-r...,100,12,100,84,"Set in Sicily in 1860, Luchino Visconti's spec...","['Drama', 'History']",Drama
4,The Conformist,movie,https://www.metacritic.com/movie/the-conformis...,100,11,100,105,"Set in Rome in the 1930s, this re-release of B...",['Drama'],Drama
...,...,...,...,...,...,...,...,...,...,...
15149,Cavemen,tv,https://www.metacritic.com/tv/cavemen/,19,13,19,6,"Cavemen revolves around Joel, his younger brot...","['Comedy', 'Sci-Fi']",Comedy
15150,Work It,tv,https://www.metacritic.com/tv/work-it/,19,22,19,44,"After they are laid off, Lee Standish (Ben Kol...",['Comedy'],Comedy
15151,Category 7: The End of the World,tv,https://www.metacritic.com/tv/category-7-the-e...,18,11,18,7,"""Category 7: The End of the World"" picks up wh...","['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th...",Action
15152,Stalker,tv,https://www.metacritic.com/tv/stalker/,17,24,17,137,Lt. Beth Davis (Maggie Q) leads the Threat Ass...,"['Crime', 'Drama', 'Thriller']",Crime


In [8]:
genre_count = df['Single_Genre'].value_counts()
threshold = int(len(df) * 0.005) #Rare genre threshold
rare_genres = [ cat for cat, count in genre_count.items() if count < threshold]

In [9]:
print(len(rare_genres))
print()
print(rare_genres[:3])

19

['Fantasy', 'Reality-TV', 'Thriller']


In [10]:
rare_indices_to_drop = [idx for idx, row in df.iterrows() if row['Single_Genre'] in rare_genres]
len(rare_indices_to_drop)

270

In [11]:
df = df.drop(rare_indices_to_drop).reset_index(drop=True)
df.shape

(14884, 10)

In [12]:
df['Single_Genre'].value_counts()

Single_Genre
Comedy         3837
Drama          3470
Action         2437
Documentary    1353
Crime          1199
Biography       825
Adventure       632
Animation       605
Horror          526
Name: count, dtype: int64

In [13]:
len(df['Single_Genre'].value_counts())

9

In [14]:
labels = list(set(df.Single_Genre.to_list()))
label_count = len(labels)

print(labels)
print()
print(label_count)

['Action', 'Drama', 'Comedy', 'Adventure', 'Horror', 'Animation', 'Biography', 'Crime', 'Documentary']

9


In [15]:
df

Unnamed: 0,Title,Category,Url,Metascore,Number of critic reviewers,User score,Number of user reviewers,Plot summary,Genres,Single_Genre
0,Dekalog (1988),movie,https://www.metacritic.com/movie/dekalog-1988/,100,13,100,112,This masterwork by Krzysztof Kieślowski is one...,['Drama'],Drama
1,The Godfather,movie,https://www.metacritic.com/movie/the-godfather/,100,16,100,4082,Francis Ford Coppola's epic features Marlon Br...,"['Crime', 'Drama']",Crime
2,Lawrence of Arabia (re-release),movie,https://www.metacritic.com/movie/lawrence-of-a...,100,8,100,442,The 40th anniversary re-release of David Lean'...,"['Adventure', 'Biography', 'Drama', 'War']",Adventure
3,The Leopard (re-release),movie,https://www.metacritic.com/movie/the-leopard-r...,100,12,100,84,"Set in Sicily in 1860, Luchino Visconti's spec...","['Drama', 'History']",Drama
4,The Conformist,movie,https://www.metacritic.com/movie/the-conformis...,100,11,100,105,"Set in Rome in the 1930s, this re-release of B...",['Drama'],Drama
...,...,...,...,...,...,...,...,...,...,...
14879,Cavemen,tv,https://www.metacritic.com/tv/cavemen/,19,13,19,6,"Cavemen revolves around Joel, his younger brot...","['Comedy', 'Sci-Fi']",Comedy
14880,Work It,tv,https://www.metacritic.com/tv/work-it/,19,22,19,44,"After they are laid off, Lee Standish (Ben Kol...",['Comedy'],Comedy
14881,Category 7: The End of the World,tv,https://www.metacritic.com/tv/category-7-the-e...,18,11,18,7,"""Category 7: The End of the World"" picks up wh...","['Action', 'Adventure', 'Drama', 'Sci-Fi', 'Th...",Action
14882,Stalker,tv,https://www.metacritic.com/tv/stalker/,17,24,17,137,Lt. Beth Davis (Maggie Q) leads the Threat Ass...,"['Crime', 'Drama', 'Thriller']",Crime


In [16]:
df.describe(include='object')

Unnamed: 0,Title,Category,Url,Plot summary,Genres,Single_Genre
count,14884,14884,14884,14884,14884,14884
unique,14253,2,14824,14822,1746,9
top,Robin Hood,movie,https://www.metacritic.com/movie/battle-for-te...,"A comic spoof in the tradition of ""Airplane,"" ...",['Drama'],Comedy
freq,4,12403,2,2,1063,3837


# Data Processing

In [17]:
ds = Dataset.from_pandas(df)
ds

Dataset({
    features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre'],
    num_rows: 14884
})

In [18]:
ds[0]

{'Title': 'Dekalog (1988)',
 'Category': 'movie',
 'Url': 'https://www.metacritic.com/movie/dekalog-1988/',
 'Metascore': 100,
 'Number of critic reviewers': 13,
 'User score': 100,
 'Number of user reviewers': 112,
 'Plot summary': 'This masterwork by Krzysztof Kieślowski is one of the twentieth century’s greatest achievements in visual storytelling. Originally made for Polish television, Dekalog focuses on the residents of a housing complex in late-Communist Poland, whose lives become subtly intertwined as they face emotional dilemmas that are at once deeply person...',
 'Genres': "['Drama']",
 'Single_Genre': 'Drama'}

# [HF Model Link](https://huggingface.co/distilroberta-base)

In [19]:
model_name = "google-bert/bert-base-uncased"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



BertTokenizerFast(name_or_path='google-bert/bert-base-uncased', vocab_size=30522, model_max_length=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True),  added_tokens_decoder={
	0: AddedToken("[PAD]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	100: AddedToken("[UNK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	101: AddedToken("[CLS]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	102: AddedToken("[SEP]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	103: AddedToken("[MASK]", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}

In [20]:
tokenizer.tokenize(ds[0]['Plot summary'][:150])

['this',
 'master',
 '##work',
 'by',
 'k',
 '##rzy',
 '##sz',
 '##to',
 '##f',
 'ki',
 '##es',
 '##low',
 '##ski',
 'is',
 'one',
 'of',
 'the',
 'twentieth',
 'century',
 '’',
 's',
 'greatest',
 'achievements',
 'in',
 'visual',
 'storytelling',
 '.',
 'originally',
 'made',
 'for',
 'polish',
 'tel']

In [21]:
def tokenize_desc(x) :
  return tokenizer(x['Plot summary'], truncation=True, padding=True)

In [22]:
tokenized_ds = ds.map(tokenize_desc, batched=True)
tokenized_ds

Map:   0%|          | 0/14884 [00:00<?, ? examples/s]

Dataset({
    features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre', 'input_ids', 'token_type_ids', 'attention_mask'],
    num_rows: 14884
})

In [23]:
tokenized_ds[0]

{'Title': 'Dekalog (1988)',
 'Category': 'movie',
 'Url': 'https://www.metacritic.com/movie/dekalog-1988/',
 'Metascore': 100,
 'Number of critic reviewers': 13,
 'User score': 100,
 'Number of user reviewers': 112,
 'Plot summary': 'This masterwork by Krzysztof Kieślowski is one of the twentieth century’s greatest achievements in visual storytelling. Originally made for Polish television, Dekalog focuses on the residents of a housing complex in late-Communist Poland, whose lives become subtly intertwined as they face emotional dilemmas that are at once deeply person...',
 'Genres': "['Drama']",
 'Single_Genre': 'Drama',
 'input_ids': [101,
  2023,
  3040,
  6198,
  2011,
  1047,
  28534,
  17112,
  3406,
  2546,
  11382,
  2229,
  8261,
  5488,
  2003,
  2028,
  1997,
  1996,
  9086,
  2301,
  1521,
  1055,
  4602,
  10106,
  1999,
  5107,
  20957,
  1012,
  2761,
  2081,
  2005,
  3907,
  2547,
  1010,
  2139,
  12902,
  8649,
  7679,
  2006,
  1996,
  3901,
  1997,
  1037,
  3847,
 

In [24]:
row = tokenized_ds[0]

print("Plot")
print()
print(row['Plot summary'])
print()
print("====================================================================================================")
print()
print("Input ids")
print()
print(row['input_ids'])

Plot

This masterwork by Krzysztof Kieślowski is one of the twentieth century’s greatest achievements in visual storytelling. Originally made for Polish television, Dekalog focuses on the residents of a housing complex in late-Communist Poland, whose lives become subtly intertwined as they face emotional dilemmas that are at once deeply person...


Input ids

[101, 2023, 3040, 6198, 2011, 1047, 28534, 17112, 3406, 2546, 11382, 2229, 8261, 5488, 2003, 2028, 1997, 1996, 9086, 2301, 1521, 1055, 4602, 10106, 1999, 5107, 20957, 1012, 2761, 2081, 2005, 3907, 2547, 1010, 2139, 12902, 8649, 7679, 2006, 1996, 3901, 1997, 1037, 3847, 3375, 1999, 2397, 1011, 4750, 3735, 1010, 3005, 3268, 2468, 28797, 6970, 21077, 2004, 2027, 2227, 6832, 21883, 2015, 2008, 2024, 2012, 2320, 6171, 2711, 1012, 1012, 1012, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [25]:
# # Vocabulary index, Numericalization like we did in ULMFit
# tokenizer.vocab['masterwork']

# Categorize

In [26]:
labels

['Action',
 'Drama',
 'Comedy',
 'Adventure',
 'Horror',
 'Animation',
 'Biography',
 'Crime',
 'Documentary']

In [27]:
labels.index('Drama')

1

In [28]:
def categorize(x):
  return {"labels": [labels.index(genre) for genre in x['Single_Genre']]}

In [29]:
categorized_ds = tokenized_ds.map(categorize, batched=True)
categorized_ds

Map:   0%|          | 0/14884 [00:00<?, ? examples/s]

Dataset({
    features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
    num_rows: 14884
})

In [30]:
categorized_ds[0]

{'Title': 'Dekalog (1988)',
 'Category': 'movie',
 'Url': 'https://www.metacritic.com/movie/dekalog-1988/',
 'Metascore': 100,
 'Number of critic reviewers': 13,
 'User score': 100,
 'Number of user reviewers': 112,
 'Plot summary': 'This masterwork by Krzysztof Kieślowski is one of the twentieth century’s greatest achievements in visual storytelling. Originally made for Polish television, Dekalog focuses on the residents of a housing complex in late-Communist Poland, whose lives become subtly intertwined as they face emotional dilemmas that are at once deeply person...',
 'Genres': "['Drama']",
 'Single_Genre': 'Drama',
 'input_ids': [101,
  2023,
  3040,
  6198,
  2011,
  1047,
  28534,
  17112,
  3406,
  2546,
  11382,
  2229,
  8261,
  5488,
  2003,
  2028,
  1997,
  1996,
  9086,
  2301,
  1521,
  1055,
  4602,
  10106,
  1999,
  5107,
  20957,
  1012,
  2761,
  2081,
  2005,
  3907,
  2547,
  1010,
  2139,
  12902,
  8649,
  7679,
  2006,
  1996,
  3901,
  1997,
  1037,
  3847,
 

In [31]:
row = categorized_ds[0]
row['labels']

1

# Data Splitting

In [32]:
split_ds = categorized_ds.train_test_split(0.1, seed=42)
split_ds

DatasetDict({
    train: Dataset({
        features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 13395
    })
    test: Dataset({
        features: ['Title', 'Category', 'Url', 'Metascore', 'Number of critic reviewers', 'User score', 'Number of user reviewers', 'Plot summary', 'Genres', 'Single_Genre', 'input_ids', 'token_type_ids', 'attention_mask', 'labels'],
        num_rows: 1489
    })
})

# Modeling

In [33]:
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=label_count)
model

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [34]:
batch_size = 32
epochs = 100
lr = 3.75e-4

In [35]:
training_args = TrainingArguments(
    "models",
    learning_rate = lr,
    warmup_ratio = 0.1,
    lr_scheduler_type='cosine',
    fp16=True,
    evaluation_strategy='epoch',
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=epochs,
    weight_decay=0.01,
    report_to='none',
    load_best_model_at_end = True,
    save_strategy='epoch'
    # save_steps=200,
)



In [36]:
def accuracy(eval_preds):
  metric = evaluate.load("accuracy")
  logits, labels = eval_preds
  predictions = np.argmax(logits, axis=-1)
  return metric.compute(predictions=predictions, references=labels)

In [37]:
trainer = Trainer(
    model,
    training_args,
    train_dataset = split_ds['train'],
    eval_dataset = split_ds['test'],
    tokenizer = tokenizer,
    compute_metrics = accuracy,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3, early_stopping_threshold=0.0001)]
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [38]:
train_results = trainer.train()

Epoch,Training Loss,Validation Loss,Accuracy
1,No log,1.187645,0.596373
2,1.487200,1.106007,0.610477
3,0.949300,1.191295,0.59503
4,0.658800,1.641368,0.529214
5,0.489700,1.711686,0.555406


Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [39]:
print(train_results)

TrainOutput(global_step=2095, training_loss=0.878542926943103, metrics={'train_runtime': 705.4687, 'train_samples_per_second': 1898.738, 'train_steps_per_second': 59.393, 'total_flos': 3747874301214912.0, 'train_loss': 0.878542926943103, 'epoch': 5.0})


In [41]:
metrics = train_results.metrics
max_train_steps = len(trainer.state.log_history) * trainer.args.per_device_train_batch_size
metrics["train_samples"] = max_train_steps

trainer.log_metrics("train", metrics)
trainer.save_metrics("train", metrics)
trainer.save_state()

# Evaluate and log best model
metrics = trainer.evaluate()
trainer.log_metrics("eval", metrics)
trainer.save_metrics("eval", metrics)

***** train metrics *****
  epoch                    =        5.0
  total_flos               =  3490479GF
  train_loss               =     0.8785
  train_runtime            = 0:11:45.46
  train_samples            =        352
  train_samples_per_second =   1898.738
  train_steps_per_second   =     59.393
***** eval metrics *****
  epoch                   =        5.0
  eval_accuracy           =     0.6105
  eval_loss               =      1.106
  eval_runtime            = 0:00:05.32
  eval_samples_per_second =    279.532
  eval_steps_per_second   =      8.823


In [42]:
# train_losses = [log['train_loss'] for log in trainer.state.log_history]
# eval_losses = [log['eval_loss'] for log in trainer.state.log_history]

# plt.figure(figsize=(10, 6))
# plt.plot(train_losses, label='Train Loss')
# plt.plot(eval_losses, label='Validation Loss')
# plt.xlabel('Epoch')
# plt.ylabel('Loss')
# plt.legend()
# plt.title('Training and Validation Loss')
# plt.show()

In [43]:
trainer.save_model('models/film_genre_classifier_bert_base_uncased')

In [44]:
import shutil
shutil.make_archive("film_genre_classifier_bert_base_uncased", 'zip', 'models')

'/kaggle/working/film_genre_classifier_bert_base_uncased.zip'