In [6]:
!ls -R /kaggle/input/

/kaggle/input/:
roberta-large-mnli-local  twitter-climate-change-sentiment-dataset

/kaggle/input/roberta-large-mnli-local:
transformers

/kaggle/input/roberta-large-mnli-local/transformers:
default

/kaggle/input/roberta-large-mnli-local/transformers/default:
1

/kaggle/input/roberta-large-mnli-local/transformers/default/1:
roberta-large-mnli-local

/kaggle/input/roberta-large-mnli-local/transformers/default/1/roberta-large-mnli-local:
config.json  model.safetensors	      tokenizer_config.json  vocab.json
merges.txt   special_tokens_map.json  tokenizer.json

/kaggle/input/twitter-climate-change-sentiment-dataset:
twitter_sentiment_data.csv


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [7]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import os

# Path yang benar berdasarkan struktur folder Anda
model_path = "/kaggle/input/roberta-large-mnli-local/transformers/default/1/roberta-large-mnli-local"

print(f"Menggunakan path: {model_path}")

# Me-load tokenizer dan model dari path yang sudah benar
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)

print("\nModel dan tokenizer berhasil dimuat! ✅")

# Contoh penggunaan
text = "The new movie was absolutely fantastic!"
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)

print("\nContoh inferensi berhasil dijalankan.")

Menggunakan path: /kaggle/input/roberta-large-mnli-local/transformers/default/1/roberta-large-mnli-local

Model dan tokenizer berhasil dimuat! ✅

Contoh inferensi berhasil dijalankan.


In [8]:
total_params = sum(p.numel() for p in model.parameters())
params_bytes = total_params * 4 #FP32 = 4 bytes
params_size_gb = params_bytes / (1024**3)
print(total_params)
print(params_size_gb)

355362819
1.3238296620547771


In [9]:
import torch
import pandas as pd
import numpy as np
import os

from tqdm.auto import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from datasets import Dataset
from torch.utils.data import DataLoader
from torch.optim import AdamW
from transformers import AutoConfig
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup

In [10]:
url = "https://raw.githubusercontent.com/NafisNaufal/climate-change-sentiment-analysis/add-file-intern/cleaned_tweets.csv"
df = pd.read_csv(url)

print(df.head())

df_clean = df_clean.dropna(subset=['message', 'sentiment']).reset_index(drop=True)
df_clean = df_clean[df_clean['sentiment'] != 2].reset_index(drop=True)

train_df, test_df = train_test_split(df_clean, test_size=0.2, random_state=42, stratify=df_clean['sentiment'])

sentiments = df_clean['sentiment'].unique().tolist()
sentiment2id = {sentiment: i for i, sentiment in enumerate(sentiments)}
id2sentiment = {i: sentiment for i, sentiment in enumerate(sentiments)}

train_df['sentiment_encoded'] = train_df['sentiment'].map(sentiment2id)
test_df['sentiment_encoded'] = test_df['sentiment'].map(sentiment2id)

train_dataset = Dataset.from_pandas(train_df)
test_dataset = Dataset.from_pandas(test_df)

                                             message  sentiment
0  @tiniebeany climate change is an interesting h...         -1
1  Watch #BeforeTheFlood right here, as @LeoDiCap...          1
2  Fabulous! Leonardo #DiCaprio's film on #climat...          1
3  Just watched this amazing documentary by leona...          1
4  Leonardo DiCaprio's climate change documentary...          0


In [11]:
def tokenize_function(examples):
  return tokenizer(examples['message'], padding="max_length", truncation=True, max_length=128)

tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True)
tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True)

tokenized_train_dataset = tokenized_train_dataset.remove_columns(['sentiment', 'message', '__index_level_0__'])
tokenized_test_dataset = tokenized_test_dataset.remove_columns(['sentiment', 'message', '__index_level_0__'])
tokenized_train_dataset = tokenized_train_dataset.rename_column('sentiment_encoded', 'labels')
tokenized_test_dataset = tokenized_test_dataset.rename_column('sentiment_encoded', 'labels')


tokenized_train_dataset.set_format('torch')
tokenized_test_dataset.set_format('torch')

Map:   0%|          | 0/24248 [00:00<?, ? examples/s]

Map:   0%|          | 0/6062 [00:00<?, ? examples/s]

In [12]:
train_dataloader = DataLoader(tokenized_train_dataset, shuffle=True, batch_size=16)
eval_dataloader = DataLoader(tokenized_test_dataset, shuffle=False, batch_size=16)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [13]:
optimizer = AdamW(model.parameters(), lr=2e-5, weight_decay=0.01)

num_epochs = 3
num_training_steps = num_epochs * len(train_dataloader)
lr_scheduler = get_linear_schedule_with_warmup(
    optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
)

In [14]:
%%writefile train_script.py

import os
import torch
import pandas as pd
import numpy as np
import torch.distributed as dist
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.data import DataLoader
from torch.utils.data.distributed import DistributedSampler
from torch.cuda.amp import GradScaler, autocast
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from datasets import Dataset
from torch.optim import AdamW
from transformers import AutoTokenizer, AutoModelForSequenceClassification, get_linear_schedule_with_warmup
import kagglehub

def main_worker(rank, world_size, model_path, num_epochs, batch_size, accumulation_steps):
    print(f"Running DDP on rank {rank}.")
    os.environ['MASTER_ADDR'] = 'localhost'
    os.environ['MASTER_PORT'] = '12355'
    dist.init_process_group("nccl", rank=rank, world_size=world_size)

    tokenizer = AutoTokenizer.from_pretrained(model_path)
    model = AutoModelForSequenceClassification.from_pretrained(model_path)
    model.to(rank)
    ddp_model = DDP(model, device_ids=[rank])

    # --- Persiapan Dataset (tidak ada perubahan) ---
    path = kagglehub.dataset_download("edqian/twitter-climate-change-sentiment-dataset")
    csv_path = os.path.join(path, "twitter_sentiment_data.csv")
    df = pd.read_csv(csv_path)
    df_clean = df.dropna(subset=['message', 'sentiment']).reset_index(drop=True)
    df_clean = df_clean.drop(columns='tweetid')
    df_clean = df_clean[df_clean['sentiment'] != 2].reset_index(drop=True)
    train_df, test_df = train_test_split(df_clean, test_size=0.2, random_state=42, stratify=df_clean['sentiment'])
    sentiments = sorted(df_clean['sentiment'].unique().tolist())
    sentiment2id = {sentiment: i for i, sentiment in enumerate(sentiments)}
    train_df['sentiment_encoded'] = train_df['sentiment'].map(sentiment2id)
    test_df['sentiment_encoded'] = test_df['sentiment'].map(sentiment2id)
    train_dataset = Dataset.from_pandas(train_df)
    test_dataset = Dataset.from_pandas(test_df)

    def tokenize_function(examples):
        return tokenizer(examples['message'], padding="max_length", truncation=True, max_length=128)

    tokenized_train_dataset = train_dataset.map(tokenize_function, batched=True, num_proc=os.cpu_count())
    tokenized_test_dataset = test_dataset.map(tokenize_function, batched=True, num_proc=os.cpu_count())
    
    cols_to_remove = ['sentiment', 'message']
    if '__index_level_0__' in tokenized_train_dataset.column_names:
        cols_to_remove.append('__index_level_0__')
    tokenized_train_dataset = tokenized_train_dataset.remove_columns(cols_to_remove)
    tokenized_test_dataset = tokenized_test_dataset.remove_columns(cols_to_remove)
    tokenized_train_dataset = tokenized_train_dataset.rename_column('sentiment_encoded', 'labels')
    tokenized_test_dataset = tokenized_test_dataset.rename_column('sentiment_encoded', 'labels')

    tokenized_train_dataset.set_format('torch')
    tokenized_test_dataset.set_format('torch')

    train_sampler = DistributedSampler(tokenized_train_dataset, num_replicas=world_size, rank=rank)
    eval_sampler = DistributedSampler(tokenized_test_dataset, num_replicas=world_size, rank=rank, shuffle=False)
    
    train_dataloader = DataLoader(tokenized_train_dataset, batch_size=batch_size, sampler=train_sampler)
    eval_dataloader = DataLoader(tokenized_test_dataset, batch_size=batch_size, sampler=eval_sampler)

    optimizer = AdamW(ddp_model.parameters(), lr=2e-5, weight_decay=0.01)
    num_training_steps = num_epochs * len(train_dataloader)
    lr_scheduler = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_training_steps
    )
    
    # Inisialisasi GradScaler untuk Automatic Mixed Precision (AMP)
    scaler = GradScaler()

    def compute_metrics(preds, labels):
        acc = accuracy_score(labels, preds)
        f1 = f1_score(labels, preds, average="weighted")
        return {"accuracy": acc, "f1_score": f1}

    best_accuracy = 0
    output_dir = "./roberta-finetuned-ddp-final"
    if rank == 0:
        os.makedirs(output_dir, exist_ok=True)

    for epoch in range(num_epochs):
        train_sampler.set_epoch(epoch)
        ddp_model.train()
        progress_bar_train = tqdm(train_dataloader, desc=f"Epoch {epoch+1}/{num_epochs} [Training]", disable=(rank!=0))

        for i, batch in enumerate(progress_bar_train):
            batch = {k: v.to(rank) for k, v in batch.items()}
            
            # Menggunakan autocast untuk forward pass (Mixed Precision)
            with autocast():
                outputs = ddp_model(**batch)
                loss = outputs.loss
                # Normalisasi loss untuk gradient accumulation
                loss = loss / accumulation_steps
            
            # Scale loss dan jalankan backward pass
            scaler.scale(loss).backward()
            
            # Jalankan optimizer step hanya setelah beberapa langkah (Gradient Accumulation)
            if (i + 1) % accumulation_steps == 0:
                scaler.step(optimizer)
                scaler.update()
                lr_scheduler.step()
                optimizer.zero_grad()

            if rank == 0:
                progress_bar_train.set_postfix({'loss': loss.item() * accumulation_steps})
        
        # --- Evaluation Loop ---
        ddp_model.eval()
        all_val_preds, all_val_labels = [], []
        with torch.no_grad():
            for batch in eval_dataloader:
                batch = {k: v.to(rank) for k, v in batch.items()}
                with autocast(): # Gunakan autocast juga saat evaluasi
                    outputs = ddp_model(**batch)
                all_val_preds.extend(torch.argmax(outputs.logits, dim=-1).cpu().numpy())
                all_val_labels.extend(batch['labels'].cpu().numpy())
        
        # Kumpulkan hasil dari semua GPU
        pred_tensor = torch.tensor(all_val_preds, dtype=torch.long).to(rank)
        label_tensor = torch.tensor(all_val_labels, dtype=torch.long).to(rank)
        gathered_preds = [torch.zeros_like(pred_tensor) for _ in range(world_size)]
        gathered_labels = [torch.zeros_like(label_tensor) for _ in range(world_size)]
        dist.all_gather(gathered_preds, pred_tensor)
        dist.all_gather(gathered_labels, label_tensor)

        if rank == 0:
            final_preds = torch.cat(gathered_preds).cpu().numpy()
            final_labels = torch.cat(gathered_labels).cpu().numpy()
            val_metrics = compute_metrics(final_preds, final_labels)
            print(f"Epoch {epoch+1} | Val Acc: {val_metrics['accuracy']:.4f} | Val F1: {val_metrics['f1_score']:.4f}")
        
            print("\n=== Classification Report ===")
            print(classification_report(final_labels, final_preds, target_names=[str(s) for s in sentiments]))
        
            cm = confusion_matrix(final_labels, final_preds)
            print("\n=== Confusion Matrix ===")
            print(cm)
        
            # (Opsional) Visualisasi Confusion Matrix
            plt.figure(figsize=(6, 5))
            sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                        xticklabels=[str(s) for s in sentiments],
                        yticklabels=[str(s) for s in sentiments])
            plt.xlabel('Predicted')
            plt.ylabel('Actual')
            plt.title(f'Confusion Matrix (Epoch {epoch+1})')
            plt.tight_layout()
            plt.savefig(f'{output_dir}/confusion_matrix_epoch_{epoch+1}.png')
            plt.close()


            if val_metrics['accuracy'] > best_accuracy:
                best_accuracy = val_metrics['accuracy']
                ddp_model.module.save_pretrained(output_dir)
                tokenizer.save_pretrained(output_dir)
                print(f"Model terbaik baru disimpan dengan akurasi: {best_accuracy:.4f}")

    dist.destroy_process_group()

Writing train_script.py


In [15]:
import torch
import torch.multiprocessing as mp
from train_script import main_worker

# Restart session Anda sebelum menjalankan ini jika perlu

# --- KONFIGURASI ---
model_path = "/kaggle/input/roberta-large-mnli-local/transformers/default/1/roberta-large-mnli-local"
num_epochs = 3
# Gunakan batch size kecil yang aman untuk memori
batch_size = 4
# Akumulasi gradien untuk mensimulasikan batch size yang lebih besar (4 * 4 = 16)
accumulation_steps = 4
# -----------------

world_size = torch.cuda.device_count()
print(f"Memulai DDP dengan {world_size} GPU.")
print(f"Batch size per GPU: {batch_size}")
print(f"Accumulation steps: {accumulation_steps}")
print(f"Effective global batch size: {batch_size * world_size * accumulation_steps}")

# Jalankan pelatihan
mp.spawn(main_worker,
         args=(world_size, model_path, num_epochs, batch_size, accumulation_steps),
         nprocs=world_size,
         join=True)

print("\nPelatihan DDP selesai.")

Memulai DDP dengan 2 GPU.
Batch size per GPU: 4
Accumulation steps: 4
Effective global batch size: 32


[W1015 07:45:58.209720372 socket.cpp:759] [c10d] The client socket has failed to connect to [localhost]:12355 (errno: 99 - Cannot assign requested address).
2025-10-15 07:46:00.640897: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760514360.670407      98 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1760514360.681038      98 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-10-15 07:46:00.767525: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1760514360.785088      99 cuda_dnn.cc:8310] Unable to regi

Running DDP on rank 1.
Running DDP on rank 0.


Map (num_proc=4): 100%|██████████| 27733/27733 [00:06<00:00, 4436.03 examples/s]
Map (num_proc=4): 100%|██████████| 27733/27733 [00:06<00:00, 4213.05 examples/s]
Map (num_proc=4): 100%|██████████| 6934/6934 [00:02<00:00, 2969.73 examples/s]
  scaler = GradScaler()
  with autocast():
Map (num_proc=4): 100%|██████████| 6934/6934 [00:02<00:00, 2925.85 examples/s]
  scaler = GradScaler()
  with autocast():
  with autocast(): # Gunakan autocast juga saat evaluasi
Epoch 1/3 [Training]: 100%|██████████| 3467/3467 [24:42<00:00,  2.34it/s, loss=0.28]
  with autocast(): # Gunakan autocast juga saat evaluasi


Epoch 1 | Val Acc: 0.8151 | Val F1: 0.7931

=== Classification Report ===
              precision    recall  f1-score   support

          -1       0.69      0.80      0.74       798
           0       0.84      0.37      0.52      1543
           1       0.83      0.97      0.89      4593

    accuracy                           0.82      6934
   macro avg       0.79      0.71      0.72      6934
weighted avg       0.82      0.82      0.79      6934


=== Confusion Matrix ===
[[ 640   39  119]
 [ 198  576  769]
 [  88   69 4436]]
Model terbaik baru disimpan dengan akurasi: 0.8151


  with autocast():
Epoch 2/3 [Training]: 100%|██████████| 3467/3467 [24:42<00:00,  2.34it/s, loss=0.0275] 
  with autocast(): # Gunakan autocast juga saat evaluasi


Epoch 2 | Val Acc: 0.8473 | Val F1: 0.8392

=== Classification Report ===
              precision    recall  f1-score   support

          -1       0.80      0.76      0.78       798
           0       0.78      0.57      0.66      1543
           1       0.87      0.96      0.91      4593

    accuracy                           0.85      6934
   macro avg       0.82      0.76      0.78      6934
weighted avg       0.84      0.85      0.84      6934


=== Confusion Matrix ===
[[ 603   93  102]
 [ 106  878  559]
 [  45  154 4394]]
Model terbaik baru disimpan dengan akurasi: 0.8473


  with autocast():
Epoch 3/3 [Training]: 100%|██████████| 3467/3467 [24:40<00:00,  2.34it/s, loss=0.167]   
  with autocast(): # Gunakan autocast juga saat evaluasi


Epoch 3 | Val Acc: 0.8411 | Val F1: 0.8384

=== Classification Report ===
              precision    recall  f1-score   support

          -1       0.68      0.87      0.77       798
           0       0.73      0.60      0.66      1543
           1       0.90      0.92      0.91      4593

    accuracy                           0.84      6934
   macro avg       0.77      0.80      0.78      6934
weighted avg       0.84      0.84      0.84      6934


=== Confusion Matrix ===
[[ 698   43   57]
 [ 229  929  385]
 [  93  295 4205]]

Pelatihan DDP selesai.


In [16]:
!ls -R /kaggle

/kaggle:
input  lib  working

/kaggle/input:
roberta-large-mnli-local  twitter-climate-change-sentiment-dataset

/kaggle/input/roberta-large-mnli-local:
transformers

/kaggle/input/roberta-large-mnli-local/transformers:
default

/kaggle/input/roberta-large-mnli-local/transformers/default:
1

/kaggle/input/roberta-large-mnli-local/transformers/default/1:
roberta-large-mnli-local

/kaggle/input/roberta-large-mnli-local/transformers/default/1/roberta-large-mnli-local:
config.json  model.safetensors	      tokenizer_config.json  vocab.json
merges.txt   special_tokens_map.json  tokenizer.json

/kaggle/input/twitter-climate-change-sentiment-dataset:
twitter_sentiment_data.csv

/kaggle/lib:
kaggle

/kaggle/lib/kaggle:
gcp.py

/kaggle/working:
__pycache__  roberta-finetuned-ddp-final  train_script.py

/kaggle/working/__pycache__:
train_script.cpython-311.pyc

/kaggle/working/roberta-finetuned-ddp-final:
config.json		      merges.txt	       tokenizer.json
confusion_matrix_epoch_1.png  model.safe

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [17]:
import shutil
import os

# Direktori tempat model Anda disimpan
model_directory = "./roberta-finetuned-ddp-final"
# Nama file ZIP yang akan dibuat
output_zip_filename = "roberta-finetuned-final"

print(f"Mengompres direktori '{model_directory}'...")

# Membuat file arsip (ZIP)
shutil.make_archive(output_zip_filename, 'zip', model_directory)

print(f"\nSelesai! Model Anda telah disimpan sebagai '{output_zip_filename}.zip'")
print(f"Anda bisa menemukan file ini di direktori output: /kaggle/working/{output_zip_filename}.zip")

Mengompres direktori './roberta-finetuned-ddp-final'...

Selesai! Model Anda telah disimpan sebagai 'roberta-finetuned-final.zip'
Anda bisa menemukan file ini di direktori output: /kaggle/working/roberta-finetuned-final.zip


In [19]:
import torch
import numpy as np
from tqdm import tqdm

# Pastikan model di mode evaluasi
model.eval()

# Simpan hasil prediksi dan label
all_preds = []
all_labels = []

batch_size = 8  # kecilkan jika GPU kecil
num_batches = int(np.ceil(len(test_df) / batch_size))

with torch.no_grad():
    for i in tqdm(range(num_batches), desc="Evaluating in batches"):
        batch_texts = test_df['message'].iloc[i*batch_size:(i+1)*batch_size].tolist()
        batch_labels = test_df['sentiment'].iloc[i*batch_size:(i+1)*batch_size].map(sentiment2id).tolist()

        inputs = tokenizer(batch_texts, padding=True, truncation=True, return_tensors="pt").to(model.device)
        outputs = model(**inputs)
        preds = torch.argmax(outputs.logits, dim=-1).cpu().numpy()

        all_preds.extend(preds)
        all_labels.extend(batch_labels)

# Konversi ke array numpy
all_preds_np = np.array(all_preds)
all_labels_np = np.array(all_labels)

# Cari indeks prediksi salah
incorrect_indices = np.where(all_preds_np != all_labels_np)[0]

print(f"Number of incorrect predictions: {len(incorrect_indices)}")
print("\nExamples of incorrect predictions:")

num_examples_to_show = 10
for i in incorrect_indices[:num_examples_to_show]:
    original_text = test_df.iloc[i]['message']
    true_label_encoded = all_labels_np[i]
    predicted_label_encoded = all_preds_np[i]

    true_sentiment = id2sentiment[true_label_encoded]
    predicted_sentiment = id2sentiment[predicted_label_encoded]

    print(f"\nText: {original_text}")
    print(f"True Sentiment: {true_sentiment}, Predicted Sentiment: {predicted_sentiment}")


Evaluating in batches: 100%|██████████| 758/758 [00:42<00:00, 17.95it/s]

Number of incorrect predictions: 617

Examples of incorrect predictions:

Text: @Libertea2012  The message continues the same, climate change is dangerous to our future, and world wide threat 2 all of us!
True Sentiment: 1, Predicted Sentiment: -1

Text: Or global warming!
True Sentiment: -1, Predicted Sentiment: 0

Text: Four inches of global warming and counting....
True Sentiment: -1, Predicted Sentiment: 0

Text: How are you going to tell me that climate change/global warming isnt real...
True Sentiment: 0, Predicted Sentiment: 1

Text: The irrepressible Mark Carney has set up a Stability Board to harass businesses on  'plans for climate change'
FOR GOD'S
True Sentiment: -1, Predicted Sentiment: 0

Text: Rainfall trends in arid regions buck commonly held climate change theories
True Sentiment: 1, Predicted Sentiment: -1

Text: @amlozyk Expert - climate change beyond our control
True Sentiment: 0, Predicted Sentiment: 1

Text: Obamas New EPA Climate Change Regulations Will Cost 7 Mi




In [20]:
def predict_sentiment(text, model, tokenizer, device, id2sentiment, max_length=128):
    inputs = tokenizer(
        text,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=max_length
    )
    inputs = {k: v.to(device) for k, v in inputs.items()}

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits
        predictions = torch.argmax(logits, dim=-1)

    predicted_sentiment_id = predictions.item()
    predicted_sentiment = id2sentiment[predicted_sentiment_id]

    return predicted_sentiment


In [23]:
climate_texts = [
    "Climate change is a serious issue and we need to act now!",
    "I don’t believe climate change is real.",
    "The weather is nice today, not too hot or cold."
]

for text in climate_texts:
    prediction = predict_sentiment(text, model, tokenizer, device, id2sentiment)
    print(f"Text: '{text}'\nPrediction: {prediction}\n")


Text: 'Climate change is a serious issue and we need to act now!'
Prediction: 1

Text: 'I don’t believe climate change is real.'
Prediction: 0

Text: 'The weather is nice today, not too hot or cold.'
Prediction: 0

