In [None]:
!pip install transformers datasets
!pip install --upgrade transformers accelerate


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from transformers import BertTokenizer
from transformers import BertForSequenceClassification, Trainer, TrainingArguments
from datasets import Dataset
import torch
from transformers import TrainingArguments



In [None]:
from google.colab import files
uploaded = files.upload()

In [None]:
df = pd.read_csv('cleaned_news_dataset.csv')


In [None]:
dataset = Dataset.from_pandas(df)
train_test_split = dataset.train_test_split(test_size=0.2)
train_dataset = train_test_split['train']
val_dataset = train_test_split['test']

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=128)

In [None]:
train_dataset = train_dataset.map(tokenize, batched=True)
val_dataset = val_dataset.map(tokenize, batched=True)

In [None]:
if torch.cuda.is_available():
    print("GPU is available")
else:
    print("GPU is not available")

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=10)
model.to(device)


training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    report_to='none',
    label_names=["label"]
)


# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

In [None]:
trainer.train()


In [None]:
trainer.evaluate()


In [None]:
model.save_pretrained('./fine_tuned_model')
tokenizer.save_pretrained('./fine_tuned_model')

In [None]:
shutil.make_archive('fine_tuned_model', 'zip', './fine_tuned_model')