In [1]:
from datasets import Dataset, DatasetDict



import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.gridspec import GridSpec

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import os

In [3]:
%pwd

'c:\\Users\\assi01\\Desktop\\projects\\AirTravel_Sentiment_Analysis\\research'

In [4]:
os.chdir('../')

In [5]:
%pwd

'c:\\Users\\assi01\\Desktop\\projects\\AirTravel_Sentiment_Analysis'

In [6]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=False)
class PrepareBaseModelConfig:
    root_dir: Path
    base_model_path: Path
    updated_base_model_path: Path
    base_tokenizer_path: Path
    updated_base_tokenizer_path: Path
    params_checkpoint: str
    params_num_labels: int

In [7]:
from airTravelSentimentAnalysis.constants import *
from airTravelSentimentAnalysis.utils.common import read_yaml, create_directories

In [8]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    

    def get_prepare_base_model_config(self) -> PrepareBaseModelConfig:
        config = self.config.prepare_base_model
        
        create_directories([config.root_dir])

        prepare_base_model_config = PrepareBaseModelConfig(
            root_dir=Path(config.root_dir),
            base_model_path=Path(config.base_model_path),
            updated_base_model_path=Path(config.updated_base_model_path),
            base_tokenizer_path=Path(config.base_tokenizer_path),
            updated_base_tokenizer_path=Path(config.updated_base_tokenizer_path),
            params_checkpoint=self.params.CHECKPOINT,
            params_num_labels=self.params.NUM_LABELS
        )

        return prepare_base_model_config

In [9]:
import os
import urllib.request as request
from zipfile import ZipFile
from transformers import AutoTokenizer
from transformers import AutoModelForSequenceClassification
from transformers import TrainingArguments
from transformers import Trainer
from transformers import pipeline

In [10]:
class PrepareBaseModel:
    def __init__(self, config: PrepareBaseModelConfig):
        self.config = config

    
    def get_base_model(self):
        self.model = AutoModelForSequenceClassification.from_pretrained(self.config.params_checkpoint, num_labels=self.config.params_num_labels)

        self.save_model(path=self.config.base_model_path, model=self.model)

    def get_base_model_tokenizer(self):
        self.tokenizer = AutoTokenizer.from_pretrained(self.config.params_checkpoint)

        self.save_model(path=self.config.base_tokenizer_path, model=self.tokenizer)
    
        
    @staticmethod
    def save_model(path: Path, model: AutoModelForSequenceClassification):
        model.save_pretrained(path)
    
    @staticmethod
    def save_tokenizer(path: Path, tokenizer: AutoTokenizer):
        tokenizer.save_pretrained(path)

In [11]:
try:
    config = ConfigurationManager()
    prepare_base_model_config = config.get_prepare_base_model_config()
    prepare_base_model = PrepareBaseModel(config=prepare_base_model_config)
    prepare_base_model.get_base_model()
    prepare_base_model.get_base_model_tokenizer()
except Exception as e:
    raise e

[2025-05-22 12:21:00,936: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-05-22 12:21:00,938: INFO: common: yaml file: params.yaml loaded successfully]
[2025-05-22 12:21:00,939: INFO: common: created directory at: artifacts]
[2025-05-22 12:21:00,940: INFO: common: created directory at: artifacts/prepare_base_model]


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
