# Install dependencies

In [1]:
!pip install transformers datasets evaluate accelerate scikit-learn pandas --quiet
!pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 --quiet
!pip install onnx onnxruntime-gpu optimum[onnxruntime-gpu] huggingface_hub --quiet

# Import Libraries

In [70]:
import torch
import os
import numpy as np
import pandas as pd
from pathlib import Path
import shutil
from datasets import Dataset, DatasetDict
from transformers import (
    AutoTokenizer, 
    AutoModelForSequenceClassification,
    AutoConfig,
    TrainingArguments, 
    Trainer,
    EarlyStoppingCallback
)
from sklearn.metrics import classification_report, f1_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.utils.class_weight import compute_class_weight
from optimum.onnxruntime import ORTModelForSequenceClassification
import onnxruntime as ort
from huggingface_hub import HfApi, create_repo, login
import warnings
warnings.filterwarnings('ignore')

# Set Configuration

In [3]:
class Config:
    # Model Selection
    MODEL_NAME = "Shushant/nepaliBERT"
    
    # Training Hyperparameters
    MAX_LENGTH = 256
    BATCH_SIZE = 16
    LEARNING_RATE = 2e-5
    NUM_EPOCHS = 5
    WARMUP_RATIO = 0.1
    WEIGHT_DECAY = 0.01
    
    # Class Configuration
    NUM_LABELS = 3
    
    # Data Split
    TEST_SIZE = 0.15
    VAL_SIZE = 0.15
    RANDOM_SEED = 42

config = Config()

# Set cuda usage

In [4]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

if device == "cuda":
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    print(f"   Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    # Enable TF32 for faster training on Ampere GPUs
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

Using device: cuda
   GPU: Tesla T4
   Memory: 15.83 GB


# Load Dataset

In [5]:
# Enter your kaggle username and key here
# os.environ["KAGGLE_USERNAME"] = ""
# os.environ["KAGGLE_KEY"] = ""

In [6]:
!kaggle datasets download -d mathew11111/nepcov19tweets -p . --unzip

Traceback (most recent call last):
  File "/usr/local/bin/kaggle", line 10, in <module>
    sys.exit(main())
             ^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/kaggle/cli.py", line 68, in main
    out = args.func(**command_args)
          ^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/kaggle/api/kaggle_api_extended.py", line 1741, in dataset_download_cli
    with self.build_kaggle_client() as kaggle:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.12/dist-packages/kaggle/api/kaggle_api_extended.py", line 688, in build_kaggle_client
    username=self.config_values['username'],
             ~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^
KeyError: 'username'


In [7]:
!curl -L -o dataset2.csv "https://raw.githubusercontent.com/sagarl123/NepaliNLP-SentimentAnalysis/refs/heads/main/collected_labeled_data.csv"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 1822k  100 1822k    0     0  4226k      0 --:--:-- --:--:-- --:--:-- 4219k


In [8]:
df1 = pd.read_csv("covid19_tweeter_dataset.csv")
df2 = pd.read_csv("dataset2.csv")

# Clean the dataset for use

In [9]:
df1 = df1[['Label', 'Tweet']]

In [10]:
df1.drop(df1.index[~df1['Label'].isin([-1, 0, 1])], inplace=True)

In [11]:
df1 = df1.rename(columns={
    "Label": "labels",
    "Tweet": "text"
})

In [12]:
df1['labels'] = df1['labels'].map({0: 2, -1: 0, 1: 1})

In [13]:
df2.columns

Index(['text', 'label'], dtype='object')

In [14]:
df2 = df2.rename(columns={
    'label': 'labels'
})

In [15]:
# Merge the dataframes
df = pd.concat([df1, df2], ignore_index=True)

In [16]:
# Ensure labels and texts are in correct format
df['labels'] = df['labels'].astype(int)
df['text'] = df['text'].astype(str)

In [17]:
df['labels'].value_counts()

Unnamed: 0_level_0,count
labels,Unnamed: 1_level_1
1,3662
0,2398
2,1931


# Train Test Validation Split

In [18]:
train_df, temp_df = train_test_split(
    df, 
    test_size=(config.TEST_SIZE + config.VAL_SIZE),
    stratify=df['labels'],
    random_state=config.RANDOM_SEED
)

val_df, test_df = train_test_split(
    temp_df,
    test_size=config.TEST_SIZE / (config.TEST_SIZE + config.VAL_SIZE),
    stratify=temp_df['labels'],
    random_state=config.RANDOM_SEED
)

In [19]:
print(f"   Train: {len(train_df)} samples")
print(f"   Validation: {len(val_df)} samples")
print(f"   Test: {len(test_df)} samples\n")

   Train: 5593 samples
   Validation: 1199 samples
   Test: 1199 samples



# Conversion to HuggingFace dataset

In [20]:
dataset = DatasetDict({
    'train': Dataset.from_pandas(train_df.reset_index(drop=True)),
    'validation': Dataset.from_pandas(val_df.reset_index(drop=True)),
    'test': Dataset.from_pandas(test_df.reset_index(drop=True))
})

# Load Tokenizer and Model

In [21]:
tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME)

In [68]:
tokenizer = AutoTokenizer.from_pretrained(config.MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    config.MODEL_NAME, 
    num_labels=config.NUM_LABELS,
    problem_type="single_label_classification",
).to(device)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at Shushant/nepaliBERT and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# Tokenization

In [23]:
def tokenize_function(examples):
    """Tokenize text with proper padding and truncation"""
    return tokenizer(
        examples["text"],
        truncation=True,
        padding="max_length",
        max_length=config.MAX_LENGTH,
        return_tensors=None 
    )

In [24]:
tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["text"]
)
tokenized_dataset.set_format("torch")

Map:   0%|          | 0/5593 [00:00<?, ? examples/s]

Map:   0%|          | 0/1199 [00:00<?, ? examples/s]

Map:   0%|          | 0/1199 [00:00<?, ? examples/s]

# Compute metrics for evaluation

In [25]:
def compute_metrics(eval_pred):
    """Calculate accuracy and macro F1 score"""
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    macro_f1 = f1_score(labels, predictions, average='macro')
    accuracy = accuracy_score(labels, predictions)
    
    # Per-class F1 scores
    f1_per_class = f1_score(labels, predictions, average=None)
    
    return {
        'macro_f1': macro_f1,
        'accuracy': accuracy,
        'f1_negative': f1_per_class[0],
        'f1_neutral': f1_per_class[1],
        'f1_positive': f1_per_class[2]
    }

# Custom trainer with balanced class weights

In [26]:
class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(train_df['labels']),
    y=train_df['labels']
)
class_weights_tensor = torch.tensor(class_weights, dtype=torch.float).to(device)

In [27]:
class WeightedTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False, **kwargs):
        labels = inputs.pop("labels")
        outputs = model(**inputs)
        logits = outputs.logits
        
        # Apply class weights to loss
        loss_fct = torch.nn.CrossEntropyLoss(weight=class_weights_tensor)
        loss = loss_fct(logits, labels)
        
        return (loss, outputs) if return_outputs else loss

# Training arguments

In [28]:
training_args = TrainingArguments(
    output_dir="./results",
    
    # Training hyperparameters
    num_train_epochs=config.NUM_EPOCHS,
    per_device_train_batch_size=config.BATCH_SIZE,
    per_device_eval_batch_size=config.BATCH_SIZE * 2,
    learning_rate=config.LEARNING_RATE,
    weight_decay=config.WEIGHT_DECAY,
    warmup_ratio=config.WARMUP_RATIO,
    
    # Evaluation strategy
    eval_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    metric_for_best_model="macro_f1",
    greater_is_better=True,
    
    # Optimization
    fp16=torch.cuda.is_available(),  # Mixed precision training
    gradient_accumulation_steps=2,
    gradient_checkpointing=False,
    
    # Misc
    save_total_limit=2,  # Keep only 2 best checkpoints
    seed=config.RANDOM_SEED,
    report_to="none",
    disable_tqdm=False,
)

# Initialize Trainer

In [29]:
trainer = WeightedTrainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["validation"],
    compute_metrics=compute_metrics,
    callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
)

# Train model

In [30]:
train_result = trainer.train()

Epoch,Training Loss,Validation Loss,Macro F1,Accuracy,F1 Negative,F1 Neutral,F1 Positive
1,No log,0.619591,0.742633,0.7598,0.746601,0.836472,0.644828
2,No log,0.579024,0.762645,0.777314,0.740634,0.860735,0.686567
3,0.624000,0.602668,0.767206,0.783153,0.766404,0.855212,0.68
4,0.624000,0.679727,0.764425,0.784821,0.765499,0.861111,0.666667
5,0.624000,0.73013,0.769058,0.789825,0.769022,0.864469,0.673684


# Evaluation

In [31]:
test_results = trainer.evaluate(tokenized_dataset["test"])
print("Test Set Results:")
print(f"   Macro F1 Score: {test_results['eval_macro_f1']:.4f}")
print(f"   Accuracy: {test_results['eval_accuracy']:.4f}")
print(f"   F1 (Negative): {test_results['eval_f1_negative']:.4f}")
print(f"   F1 (Neutral): {test_results['eval_f1_neutral']:.4f}")
print(f"   F1 (Positive): {test_results['eval_f1_positive']:.4f}\n")

Test Set Results:
   Macro F1 Score: 0.7391
   Accuracy: 0.7631
   F1 (Negative): 0.7454
   F1 (Neutral): 0.8486
   F1 (Positive): 0.6233



In [32]:
# Detailed classification report
predictions = trainer.predict(tokenized_dataset["test"])
y_pred = np.argmax(predictions.predictions, axis=1)
y_true = predictions.label_ids

print(classification_report(
    y_true, 
    y_pred, 
    target_names=['Negative', 'Neutral', 'Positive'],
    digits=4
))

              precision    recall  f1-score   support

    Negative     0.7550    0.7361    0.7454       360
     Neutral     0.8463    0.8509    0.8486       550
    Positive     0.6169    0.6298    0.6233       289

    accuracy                         0.7631      1199
   macro avg     0.7394    0.7389    0.7391      1199
weighted avg     0.7636    0.7631    0.7633      1199



# ONNX configurations

In [53]:
model = trainer.model

model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e

In [None]:
# Login to HF
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

# Export to ONNX

In [54]:
model.save_pretrained("tmp_model")

In [65]:
ort_model = ORTModelForSequenceClassification.from_pretrained(
    'tmp_model',
    export=True,
)

# Verify onnx model's working

In [71]:
# Test inference
test_text = "यो धेरै राम्रो छ"
inputs = tokenizer(test_text, return_tensors="pt")

onnx_path = Path('tmp_model') / 'onnx' / 'model.onnx'

ort_sess = ort.InferenceSession(str(onnx_path), providers=["CUDAExecutionProvider"])

logits = ort_sess.run(
    None,
    {
        "input_ids": inputs["input_ids"].cpu().numpy(),
        "attention_mask": inputs["attention_mask"].cpu().numpy()
    }
)

print("ONNX logits:", logits)

NoSuchFile: [ONNXRuntimeError] : 3 : NO_SUCHFILE : Load model from tmp_model/onnx/model.onnx failed:Load model tmp_model/onnx/model.onnx failed. File doesn't exist

# Create model card

In [None]:
model_card = f"""---
language: ne
license: apache-2.0
tags:
- sentiment-analysis
- nepali
- onnx
- bert
- text-classification
datasets:
- custom-nepali-sentiment
metrics:
- f1
- accuracy
model-index:
- name: {HF_REPO_NAME}
  results:
  - task:
      type: text-classification
      name: Sentiment Analysis
    dataset:
      name: Nepali Sentiment Dataset
      type: custom
    metrics:
    - type: f1
      value: 0.XX  # Replace with your actual score
      name: Macro F1
---

# Nepali Sentiment Analysis (ONNX)

This model is a fine-tuned BERT model for Nepali sentiment analysis, exported to ONNX format for optimized inference.

## Model Details

- **Base Model**: Shushant/nepaliBERT (or your base model)
- **Task**: Sentiment Classification (3-class)
- **Labels**: 
  - 0: Negative
  - 1: Neutral
  - 2: Positive
- **Format**: ONNX (optimized for fast inference)

## Usage

### Installation

```bash
pip install transformers optimum[onnxruntime]
```

### Inference

```python
from transformers import AutoTokenizer
from optimum.onnxruntime import ORTModelForSequenceClassification
import torch

# Load model and tokenizer
model = ORTModelForSequenceClassification.from_pretrained("{HF_REPO_ID}")
tokenizer = AutoTokenizer.from_pretrained("{HF_REPO_ID}")

# Predict sentiment
text = "यो धेरै राम्रो छ"
inputs = tokenizer(text, return_tensors="pt")
outputs = model(**inputs)
prediction = torch.argmax(outputs.logits, dim=-1).item()

sentiment_map = {{-1: 'Negative', 0: 'Neutral', 1: 'Positive'}}
print(f"Sentiment: {{sentiment_map[prediction]}}")
```

## Performance

- **Macro F1 Score**: 0.XX (Replace with your score)
- **Accuracy**: 0.XX (Replace with your score)

## Training Data

Trained on Nepali sentiment dataset containing social media text, reviews, and comments.

## Limitations

- Best performance on Nepali text
- May have reduced accuracy on code-mixed or transliterated text
- Performance varies across different domains
"""

# Add to HuggingFace

In [None]:
repo_name = 'mohit4519/nepali-sentiment'
repo = Repository(local_dir="onnx_hf_repo", clone_from=repo_name)

# Copy files to repo
import shutil
shutil.copytree(onnx_export_dir, "onnx_hf_repo/onnx_model", dirs_exist_ok=True)
tokenizer.save_pretrained("onnx_hf_repo/tokenizer")

# Commit & push
repo.push_to_hub(commit_message="Add fine-tuned GPU-optimized ONNX sentiment model")

print(f"✅ Model uploaded to Hugging Face Hub: {repo_name}")