In [1]:
%pwd

'd:\\sign_language_conversion\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'd:\\sign_language_conversion'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    dataset_path: Path
    n_estimators: int

In [5]:
from sign_language_conversion.constants import *
from sign_language_conversion.utils.common import read_yaml, create_directories

In [16]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
        training = self.config['training']
        params = self.params
        dataset_path = os.path.join(self.config.data_ingestion.root_dir, "data.pickle")
        create_directories([Path(training['root_dir'])])
        training_config = TrainingConfig(
            root_dir=Path(training['root_dir']),
            trained_model_path=Path(training['trained_model_path']),
            dataset_path=Path(dataset_path),
            n_estimators=params['n_estimators']
        )
        return training_config

In [17]:
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np

In [18]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
        self.model = RandomForestClassifier(
            n_estimators=config.n_estimators
        )

    def train(self):
        data_path = Path(self.config.dataset_path)
        with open(data_path, 'rb') as f:
            data_dict = pickle.load(f)

        data = np.asarray(data_dict['data'])
        labels = np.asarray(data_dict['labels'])

        # Check if the number of samples is sufficient for splitting
        if len(data) == 0 or len(labels) == 0:
            print("Error: Insufficient data for splitting.")
            return

        x_train, x_test, y_train, y_test = train_test_split(
            data, labels, test_size=0.2, shuffle=True, stratify=labels)

        # Check if the resulting train set will be empty
        if len(x_train) == 0 or len(y_train) == 0:
            print("Error: Insufficient data for training.")
            return

        self.model.fit(x_train, y_train)

        y_predict = self.model.predict(x_test)
        score = accuracy_score(y_test, y_predict)

        print(f'{score * 100:.2f}% of samples were classified correctly!')

        model_save_path = Path('model1.p')
        with open(model_save_path, 'wb') as f:
            pickle.dump({'model': self.model}, f)

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

    def save_model(self, path: Path, model: RandomForestClassifier):
        with open(path, 'wb') as f:
            pickle.dump(model, f)

In [19]:
try:
    config_manager = ConfigurationManager()
    training_config = config_manager.get_training_config()
    trainer = Training(config=training_config)
    trainer.train()
except Exception as e:
    raise e

[2024-05-25 21:02:30,919: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-05-25 21:02:30,924: INFO: common: yaml file: params.yaml loaded successfully]
[2024-05-25 21:02:30,926: INFO: common: created directory at: artifacts]
[2024-05-25 21:02:30,928: INFO: common: created directory at: artifacts\training]
100.00% of samples were classified correctly!
