In [1]:
%pwd

'e:\\github_clone\\Patients-Condition-Classification-Using-Drug-Reviews\\research'

In [11]:
import os
import torch.nn as nn
import torch

In [3]:
os.chdir('../')

In [21]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    data_path: Path
    model_ckpt: Path
    epochs: int
    num_classes: int
    lr: float
    weights: dict
    batch: int
    

In [7]:
from PatientConditonClassification.constants import *
from PatientConditonClassification.utils.common import read_yaml, create_directories

In [22]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params.TrainingArguments

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            model_ckpt = config.model_ckpt,
            epochs=params.epochs,
            num_classes=params.num_classes,
            lr=params.lr,
            weights=config.weights,
            batch=params.batch
        )
        return model_trainer_config

In [20]:
from transformers import DistilBertModel, DistilBertTokenizer
from sklearn.utils.class_weight import compute_class_weight
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import AdamW
import numpy as np
import pickle

In [5]:
class DistilBERT_Arch(nn.Module):
    def __init__(self, distilbert, num_classes=5):
        super(DistilBERT_Arch, self).__init__()
        self.distilbert = distilbert
        self.dropout = nn.Dropout(0.1)
        self.relu = nn.ReLU()
        self.fc1 = nn.Linear(768, 512)  # Assuming DistilBERT's hidden size is 768
        self.fc2 = nn.Linear(512, num_classes)  # Output layer with num_classes output units
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, sent_id, mask):
        distilbert_output = self.distilbert(sent_id, attention_mask=mask, return_dict=True)
        cls_hs = distilbert_output['last_hidden_state'][:, 0, :]  # Extract the [CLS] token representation
        x = self.fc1(cls_hs)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.softmax(x)
        return x


In [None]:
class ModelTrainer:
    def __init__(self, config: ModelTrainerConfig):
        self.config = config
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.files = ['train', 'val']

    def initiate_model(self):
        distilbert_model = DistilBertModel.from_pretrained(self.config.model_ckpt)
        model = DistilBERT_Arch(distilbert_model, self.config.num_classes)
        model = model.to(self.device)
        optimizer = AdamW(model.parameters(), lr=self.config.lr)
    
    def call_data(self):
        for i in range(len(self.files)):
            with open(Path(os.path.join(self.config.data_path,self.files[i],'preprocessed_data.pkl')), 'rb') as f:
                processed_data = pickle.load(f)
            seq = processed_data['seq']
            mask = processed_data['mask']
            label = processed_data['label']
            
            train_data = TensorDataset(seq, mask, label)
            train_sampler = RandomSampler(train_data)
            train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=self.config.batch)

    def train(self):
        self.model.train()
        total_loss, total_accuracy = 0, 0
        total_preds=[]
        for step,batch in enumerate(train_dataloader):