# Deep Learning

In [7]:
import random
import numpy as np
import pandas as pd
import torch
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm

from transformers import BertForSequenceClassification, BertConfig, BertTokenizer
from nltk.tokenize import TweetTokenizer
 
from indonlu.utils.forward_fn import forward_sequence_classification
from indonlu.utils.metrics import document_sentiment_metrics_fn
from indonlu.utils.data_utils import DocumentSentimentDataset, DocumentSentimentDataLoader

In [8]:
###
# common functions
###

# 
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)

# count number of parameters in a model    
def count_param(module, trainable=False):
    if trainable:
        return sum(p.numel() for p in module.parameters() if p.requires_grad)
    else:
        return sum(p.numel() for p in module.parameters())

# configure learning rate     
def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']

# convert metrics to string 
def metrics_to_string(metric_dict):
    string_list = []
    for key, value in metric_dict.items():
        string_list.append('{}:{:.2f}'.format(key, value))
    return ' '.join(string_list)

In [9]:
set_seed(20240802)

## Configure and Load Pre-trained Model

In [10]:
# Load Tokenizer and Config
tokenizer = BertTokenizer.from_pretrained('indobenchmark/indobert-base-p1')
config = BertConfig.from_pretrained('indobenchmark/indobert-base-p1')
config.num_labels = DocumentSentimentDataset.NUM_LABELS
 
# Instantiate model
model = BertForSequenceClassification.from_pretrained('indobenchmark/indobert-base-p1', config=config)

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to see activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at indobenchmark/indobert-base-p1 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
model

In [None]:
count_param(model)

## Dataset Preparation

In [None]:
train_dataset_path = 'F:\dicoding-ml-terapan\dicoding-ml-terapan\sentiment-analysis\dataset\indonlu\dataset\smsa_doc-sentiment-prosa\train_preprocess.tsv'
valid_dataset_path = 'F:\dicoding-ml-terapan\dicoding-ml-terapan\sentiment-analysis\dataset\indonlu\dataset\smsa_doc-sentiment-prosa\valid_preprocess.tsv'
test_dataset_path = 'F:\dicoding-ml-terapan\dicoding-ml-terapan\sentiment-analysis\dataset\indonlu\dataset\smsa_doc-sentiment-prosa\test_preprocess_masked_label.tsv'

# SVM Support Vector Machine