In [17]:
import torch
print(torch.backends.mps.is_available())

True


In [24]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.preprocessing import LabelEncoder
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np

# Set device to MPS if available, else use CPU
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")

# Load your dataset
df = pd.read_csv('dataset.csv')
df.dropna(subset=['Content_1', 'Genre'], inplace=True)
df.reset_index(drop=True, inplace=True)

# Encode labels
label_encoder = LabelEncoder()
df['Genre_encoded'] = label_encoder.fit_transform(df['Genre'])

# Split the dataset
train_df, val_df, test_df = np.split(df.sample(frac=1, random_state=42), [int(.8*len(df)), int(.9*len(df))])

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained('HooshvareLab/bert-fa-base-uncased')

class MovieGenreDataset(Dataset):
    def __init__(self, dataframe, tokenizer, max_length=512):
        self.tokenizer = tokenizer
        self.texts = dataframe['Content_1'].tolist()
        self.labels = dataframe['Genre_encoded'].tolist()
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        labels = self.labels[idx]
        encoding = self.tokenizer.encode_plus(
            text,
            add_special_tokens=True,
            max_length=self.max_length,
            padding='max_length',
            truncation=True,
            return_tensors="pt"
        )
        # Move tensors to the appropriate device
        return {
            'input_ids': encoding['input_ids'].to(device).squeeze(),
            'attention_mask': encoding['attention_mask'].to(device).squeeze(),
            'labels': torch.tensor(labels, dtype=torch.long).to(device)
        }

# Prepare data loaders
train_dataset = MovieGenreDataset(train_df, tokenizer)
val_dataset = MovieGenreDataset(val_df, tokenizer)
test_dataset = MovieGenreDataset(test_df, tokenizer)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=8)
test_loader = DataLoader(test_dataset, batch_size=8)

# Load the model
model = AutoModelForSequenceClassification.from_pretrained('HooshvareLab/bert-fa-base-uncased', num_labels=len(label_encoder.classes_)).to(device)

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    warmup_steps=500,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    evaluation_strategy="epoch",
    no_cuda=True  # Ensure CUDA is not used even if available
)

# Trainer initialization
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer
)

# Train the model
trainer.train()

# Evaluate the model
results = trainer.evaluate(test_dataset)
print(results)

# Save the model and tokenizer
model.save_pretrained('./parsbert_movie_genre_classifier')
tokenizer.save_pretrained('./parsbert_movie_genre_classifier')


  return bound(*args, **kwds)
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at HooshvareLab/bert-fa-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  0%|          | 0/324 [06:28<?, ?it/s]
                                                
  3%|▎         | 10/324 [00:57<30:09,  5.76s/it]

{'loss': 2.7766, 'grad_norm': 13.639311790466309, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.09}


                                                
  6%|▌         | 20/324 [01:54<28:41,  5.66s/it]

{'loss': 2.8095, 'grad_norm': 11.837835311889648, 'learning_rate': 2.0000000000000003e-06, 'epoch': 0.19}


                                                
  9%|▉         | 30/324 [02:49<27:03,  5.52s/it]

{'loss': 2.5423, 'grad_norm': 10.614428520202637, 'learning_rate': 3e-06, 'epoch': 0.28}


                                                
 12%|█▏        | 40/324 [03:45<26:04,  5.51s/it]

{'loss': 2.3232, 'grad_norm': 12.410805702209473, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.37}


                                                
 15%|█▌        | 50/324 [04:41<25:15,  5.53s/it]

{'loss': 2.1514, 'grad_norm': 10.536578178405762, 'learning_rate': 5e-06, 'epoch': 0.46}


                                                
 19%|█▊        | 60/324 [05:36<24:15,  5.51s/it]

{'loss': 1.9336, 'grad_norm': 11.015776634216309, 'learning_rate': 6e-06, 'epoch': 0.56}


                                                
 22%|██▏       | 70/324 [06:30<22:36,  5.34s/it]

{'loss': 1.7471, 'grad_norm': 7.521190166473389, 'learning_rate': 7.000000000000001e-06, 'epoch': 0.65}


                                                
 25%|██▍       | 80/324 [07:24<22:32,  5.54s/it]

{'loss': 1.7174, 'grad_norm': 7.713947296142578, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.74}


                                                
 28%|██▊       | 90/324 [08:20<21:51,  5.60s/it]

{'loss': 1.7199, 'grad_norm': 5.777948379516602, 'learning_rate': 9e-06, 'epoch': 0.83}


                                                 
 31%|███       | 100/324 [09:16<20:46,  5.56s/it]

{'loss': 1.5309, 'grad_norm': 4.915441036224365, 'learning_rate': 1e-05, 'epoch': 0.93}


 33%|███▎      | 108/324 [09:57<16:42,  4.64s/it]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

                                                 
[A                                    
 33%|███▎      | 108/324 [10:10<16:42,  4.64s/it]
[A

{'eval_loss': 1.379694938659668, 'eval_runtime': 12.4634, 'eval_samples_per_second': 8.665, 'eval_steps_per_second': 1.123, 'epoch': 1.0}


                                                 
 34%|███▍      | 110/324 [10:21<27:25,  7.69s/it]

{'loss': 1.6773, 'grad_norm': 4.564784049987793, 'learning_rate': 1.1000000000000001e-05, 'epoch': 1.02}


                                                 
 37%|███▋      | 120/324 [11:16<18:59,  5.59s/it]

{'loss': 1.4383, 'grad_norm': 10.420377731323242, 'learning_rate': 1.2e-05, 'epoch': 1.11}


                                                 
 40%|████      | 130/324 [12:11<18:01,  5.58s/it]

{'loss': 1.5841, 'grad_norm': 4.875058174133301, 'learning_rate': 1.3000000000000001e-05, 'epoch': 1.2}


                                                 
 43%|████▎     | 140/324 [13:07<16:55,  5.52s/it]

{'loss': 1.514, 'grad_norm': 6.167691230773926, 'learning_rate': 1.4000000000000001e-05, 'epoch': 1.3}


                                                 
 46%|████▋     | 150/324 [14:02<15:54,  5.48s/it]

{'loss': 1.6411, 'grad_norm': 8.106331825256348, 'learning_rate': 1.5e-05, 'epoch': 1.39}


                                                 
 49%|████▉     | 160/324 [14:58<15:01,  5.49s/it]

{'loss': 1.47, 'grad_norm': 6.181437015533447, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.48}


                                                 
 52%|█████▏    | 170/324 [15:53<13:55,  5.43s/it]

{'loss': 1.478, 'grad_norm': 6.033486843109131, 'learning_rate': 1.7000000000000003e-05, 'epoch': 1.57}


                                                 
 56%|█████▌    | 180/324 [16:48<13:19,  5.55s/it]

{'loss': 1.5745, 'grad_norm': 9.504474639892578, 'learning_rate': 1.8e-05, 'epoch': 1.67}


                                                 
 59%|█████▊    | 190/324 [17:44<12:20,  5.53s/it]

{'loss': 1.5254, 'grad_norm': 8.379024505615234, 'learning_rate': 1.9e-05, 'epoch': 1.76}


                                                 
 62%|██████▏   | 200/324 [18:39<11:28,  5.55s/it]

{'loss': 1.4258, 'grad_norm': 8.367146492004395, 'learning_rate': 2e-05, 'epoch': 1.85}


                                                 
 65%|██████▍   | 210/324 [19:35<10:34,  5.56s/it]

{'loss': 1.43, 'grad_norm': 8.655821800231934, 'learning_rate': 2.1e-05, 'epoch': 1.94}


 67%|██████▋   | 216/324 [20:05<08:21,  4.64s/it]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

                                                 
[A                                    
 67%|██████▋   | 216/324 [20:18<08:21,  4.64s/it]
[A

{'eval_loss': 1.317970871925354, 'eval_runtime': 13.0369, 'eval_samples_per_second': 8.284, 'eval_steps_per_second': 1.074, 'epoch': 2.0}


                                                 
 68%|██████▊   | 220/324 [20:40<11:33,  6.67s/it]

{'loss': 1.4434, 'grad_norm': 8.012611389160156, 'learning_rate': 2.2000000000000003e-05, 'epoch': 2.04}


                                                 
 71%|███████   | 230/324 [21:36<08:48,  5.62s/it]

{'loss': 1.4268, 'grad_norm': 10.010638236999512, 'learning_rate': 2.3000000000000003e-05, 'epoch': 2.13}


                                                 
 74%|███████▍  | 240/324 [22:31<07:43,  5.51s/it]

{'loss': 1.3783, 'grad_norm': 21.93709945678711, 'learning_rate': 2.4e-05, 'epoch': 2.22}


                                                 
 77%|███████▋  | 250/324 [23:29<06:55,  5.62s/it]

{'loss': 1.3776, 'grad_norm': 12.392486572265625, 'learning_rate': 2.5e-05, 'epoch': 2.31}


                                                 
 80%|████████  | 260/324 [24:25<06:01,  5.64s/it]

{'loss': 1.3406, 'grad_norm': 8.15299129486084, 'learning_rate': 2.6000000000000002e-05, 'epoch': 2.41}


                                                 
 83%|████████▎ | 270/324 [25:21<05:03,  5.62s/it]

{'loss': 1.3578, 'grad_norm': 7.703222274780273, 'learning_rate': 2.7000000000000002e-05, 'epoch': 2.5}


                                                 
 86%|████████▋ | 280/324 [26:17<04:06,  5.59s/it]

{'loss': 1.1011, 'grad_norm': 19.449573516845703, 'learning_rate': 2.8000000000000003e-05, 'epoch': 2.59}


                                                 
 90%|████████▉ | 290/324 [27:13<03:10,  5.62s/it]

{'loss': 1.3419, 'grad_norm': 7.991851329803467, 'learning_rate': 2.9e-05, 'epoch': 2.69}


                                                 
 93%|█████████▎| 300/324 [28:10<02:15,  5.67s/it]

{'loss': 1.2669, 'grad_norm': 10.291705131530762, 'learning_rate': 3e-05, 'epoch': 2.78}


                                                 
 96%|█████████▌| 310/324 [29:05<01:18,  5.62s/it]

{'loss': 1.3696, 'grad_norm': 7.71739387512207, 'learning_rate': 3.1e-05, 'epoch': 2.87}


                                                 
 99%|█████████▉| 320/324 [30:01<00:22,  5.56s/it]

{'loss': 1.1648, 'grad_norm': 15.52340030670166, 'learning_rate': 3.2000000000000005e-05, 'epoch': 2.96}


100%|██████████| 324/324 [30:20<00:00,  4.65s/it]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A

                                                 
[A                                    
100%|██████████| 324/324 [30:35<00:00,  4.65s/it]
                                                 
100%|██████████| 324/324 [30:35<00:00,  5.67s/it]


{'eval_loss': 1.2991434335708618, 'eval_runtime': 13.2341, 'eval_samples_per_second': 8.161, 'eval_steps_per_second': 1.058, 'epoch': 3.0}
{'train_runtime': 1835.7278, 'train_samples_per_second': 1.405, 'train_steps_per_second': 0.176, 'train_loss': 1.6375918064588382, 'epoch': 3.0}


100%|██████████| 14/14 [00:11<00:00,  1.22it/s]

{'eval_loss': 1.1904798746109009, 'eval_runtime': 12.4938, 'eval_samples_per_second': 8.644, 'eval_steps_per_second': 1.121, 'epoch': 3.0}





('./parsbert_movie_genre_classifier/tokenizer_config.json',
 './parsbert_movie_genre_classifier/special_tokens_map.json',
 './parsbert_movie_genre_classifier/vocab.txt',
 './parsbert_movie_genre_classifier/added_tokens.json',
 './parsbert_movie_genre_classifier/tokenizer.json')

In [26]:
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix

predictions = trainer.predict(test_dataset)
y_true = test_df['Genre_encoded'].to_numpy()
y_pred = np.argmax(predictions.predictions, axis=1)

# Calculate metrics
accuracy = accuracy_score(y_true, y_pred)
f1_macro = f1_score(y_true, y_pred, average='macro')
f1_micro = f1_score(y_true, y_pred, average='micro')
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
conf_matrix = confusion_matrix(y_true, y_pred)

# Print metrics
print(f'Accuracy: {accuracy}')
print(f'F1 Score (Macro): {f1_macro}')
print(f'F1 Score (Micro): {f1_micro}')
print(f'Precision (Macro): {precision}')
print(f'Recall (Macro): {recall}')
print('Confusion Matrix:')
print(conf_matrix)

100%|██████████| 14/14 [00:13<00:00,  1.02it/s]

Accuracy: 0.6388888888888888
F1 Score (Macro): 0.212359900373599
F1 Score (Micro): 0.6388888888888888
Precision (Macro): 0.2773109243697479
Recall (Macro): 0.2091723093371347
Confusion Matrix:
[[ 2  0  1  0  5  0  0  0]
 [ 0  0  0  0  5  0  0  0]
 [ 0  0 12  0 11  0  0  0]
 [ 0  0  2  0  5  0  0  0]
 [ 0  0  6  0 55  0  0  0]
 [ 0  0  0  0  2  0  0  0]
 [ 0  0  0  0  1  0  0  0]
 [ 0  0  0  0  1  0  0  0]]



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
