In [1]:
%pwd

'd:\\sign_language_conversion\\research'

In [2]:
import os
os.chdir("../")

In [3]:
%pwd

'd:\\sign_language_conversion'

In [4]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class TrainingConfig:
    root_dir: Path
    trained_model_path: Path
    dataset_path: Path
    n_estimators: int

In [5]:
from sign_language_conversion.constants import *
from sign_language_conversion.utils.common import read_yaml, create_directories

In [6]:
class ConfigurationManager:
    def __init__(self,
                 config_filepath=CONFIG_FILE_PATH,
                 params_filepath=PARAMS_FILE_PATH):
        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        create_directories([self.config['artifacts_root']])

    def get_training_config(self) -> TrainingConfig:
        training = self.config['training']
        params = self.params
        dataset_path = os.path.join(self.config.data_pickle.pickle_dataset)
        create_directories([Path(training['root_dir'])])
        training_config = TrainingConfig(
            root_dir=Path(training['root_dir']),
            trained_model_path=Path(training['trained_model_path']) / "model.p",
            dataset_path=Path(dataset_path),
            n_estimators=params['n_estimators']
        )
        return training_config

In [7]:
import pickle
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import tensorflow as tf

In [11]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
        self.model = RandomForestClassifier(n_estimators=config.n_estimators)

    def train(self):
        data_path = Path(self.config.dataset_path)
        with open(data_path, 'rb') as f:
            data_dict = pickle.load(f)
            
        data = np.asarray(data_dict['data'])
        labels = np.asarray(data_dict['labels'])
        
        max_len = max(len(item) for item in data)
        data_padded = tf.keras.preprocessing.sequence.pad_sequences(data, maxlen=max_len, padding='post', dtype='float32')

        # Convert to numpy arrays
        data_padded = np.asarray(data_padded)
        labels = np.asarray(labels)
                    
        # Split the data into training and testing sets
        x_train, x_test, y_train, y_test = train_test_split(data_padded, labels, test_size=0.2, shuffle=True, stratify=labels)

        self.model.fit(x_train, y_train)

        y_predict = self.model.predict(x_test)
        score = accuracy_score(y_test, y_predict)

        print(f'{score * 100:.2f}% of samples were classified correctly!')

        model_save_path = Path(self.config.trained_model_path)
        with open(model_save_path, 'wb') as f:
            pickle.dump({'model': self.model}, f)

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

    def save_model(self, path: Path, model: RandomForestClassifier):
        with open(path, 'wb') as f:
            pickle.dump(model, f)

In [12]:
try:
    config_manager = ConfigurationManager()
    training_config = config_manager.get_training_config()
    trainer = Training(config=training_config)
    trainer.train()
except Exception as e:
    raise e

[2024-06-03 15:21:45,316: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-03 15:21:45,316: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-03 15:21:45,316: INFO: common: created directory at: artifacts]
[2024-06-03 15:21:45,319: INFO: common: created directory at: artifacts\training]
100.00% of samples were classified correctly!
