Skip to content

Commit

Permalink
combined experiment tracking with model training
Browse files Browse the repository at this point in the history
  • Loading branch information
MartinKalema committed Jun 11, 2024
1 parent 06cf6e7 commit 279c6ec
Show file tree
Hide file tree
Showing 15 changed files with 127 additions and 91 deletions.
File renamed without changes.
3 changes: 2 additions & 1 deletion configuration/configuration.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ data_ingestion:
test_source_URL: https://drive.google.com/file/d/1mjmYzMdnn_UwSEgTQ7i-cJ5WSOokt9Er/view?usp=sharing
train_data_file: artifacts/data_ingestion/compressed/train_data.zip
test_data_file: artifacts/data_ingestion/compressed/test_data.zip
unzip_dir: artifacts/data_ingestion/decompressed
decompressed_dir: artifacts/data_ingestion/decompressed

training:
root_dir: artifacts/models
training_data_path: artifacts/data_ingestion/decompressed/Train.csv
testing_data_path: artifacts/data_ingestion/decompressed/Test.csv
8 changes: 2 additions & 6 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
from swahiliNewsClassifier import log
from swahiliNewsClassifier.pipeline.stage_01_data_ingestion import DataIngestionTrainingPipeline
from swahiliNewsClassifier.pipeline.stage_02_model_training import ModelTrainingPipeline
# from swahiliNewsClassifier.pipeline.stage_03_model_training import TrainingPipeline
# from swahiliNewsClassifier.pipeline.stage_04_model_evaluation import EvaluationPipeline
from swahiliNewsClassifier.pipeline.stage_02_model_training_and_evaluation import ModelTrainingAndEvaluationPipeline


def run_pipeline_stage(stage_name, pipeline_class) -> None:
Expand Down Expand Up @@ -30,6 +28,4 @@ def run_pipeline_stage(stage_name, pipeline_class) -> None:

if __name__ == '__main__':
run_pipeline_stage("DATA INGESTION STAGE", DataIngestionTrainingPipeline)
run_pipeline_stage("MODEL TRAINING STAGE", ModelTrainingPipeline)
# run_pipeline_stage("Model Training Stage", TrainingPipeline)
# run_pipeline_stage("Model Evaluation Stage", EvaluationPipeline)
run_pipeline_stage("MODEL TRAINING AND EVALUATION STAGE", ModelTrainingAndEvaluationPipeline)
2 changes: 1 addition & 1 deletion parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ LEARNING_RATE_2: 0.05
LEARNING_RATE_3: 0.05
LEARNING_RATE_4: 0.05
LEARNING_RATE_5: 0.03
NUMBER_OF_CLASSES: 2
NUMBER_OF_CLASSES: 5
EPOCHS_1: 5
EPOCHS_2: 5
EPOCHS_3: 5
Expand Down
2 changes: 0 additions & 2 deletions research/02_model_training.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@
" epochs_4: int\n",
" epochs_5: int\n",
" training_data: Path\n",
" number_of_classes: int\n",
" root_dir: Path"
]
},
Expand Down Expand Up @@ -142,7 +141,6 @@
" epochs_3=self.params.EPOCHS_3,\n",
" epochs_4=self.params.EPOCHS_4,\n",
" epochs_5=self.params.EPOCHS_5,\n",
" number_of_classes=self.params.NUMBER_OF_CLASSES,\n",
"\n",
" )"
]
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from swahiliNewsClassifier.entity.entities import ModelTrainingConfig
from swahiliNewsClassifier.entity.entities import ModelTrainingAndEvaluationConfig
from swahiliNewsClassifier import log
import torch
import fastai
Expand All @@ -12,19 +12,20 @@
from swahiliNewsClassifier import log
import boto3
from dotenv import load_dotenv

import dagshub
import mlflow

load_dotenv()

class ModelTraining:
def __init__(self, model_training_config: ModelTrainingConfig):
class ModelTrainingAndEvaluation:
def __init__(self, model_training_and_evaluation_config: ModelTrainingAndEvaluationConfig):
"""
Initialize ModelTraining object with the provided configuration.
Args:
model_training_config (ModelTrainingConfig): Configuration object for model training.
model_training_and_evaluation_config (ModelTrainingConfig): Configuration object for model training.
"""
self.model_training_config = model_training_config
self.model_training_and_evaluation_config = model_training_and_evaluation_config
self.bucket_name = "swahili-news-classifier"
self.model_path = f"models/text_classifier_learner.pth"
self.s3 = boto3.client('s3', aws_access_key_id=os.getenv('AWS_ACCESS_KEY_ID'), aws_secret_access_key=os.getenv('AWS_SECRET_ACCESS_KEY'), region_name=os.getenv('REGION_NAME'))
Expand All @@ -47,7 +48,7 @@ def load_data(self) -> pd.DataFrame:
pd.DataFrame: Loaded training data.
"""
log.info('Loading training data')
train = pd.read_csv(self.model_training_config.training_data)
train = pd.read_csv(self.model_training_and_evaluation_config.training_data)
return train

def prepare_data(self, train) -> 'tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame]':
Expand All @@ -60,7 +61,7 @@ def prepare_data(self, train) -> 'tuple[pd.DataFrame, pd.DataFrame, pd.DataFrame
Returns:
tuple: A tuple containing training data (df_trn), validation data (df_val), and data for language model (df_lm).
"""
df_trn, df_val = train_test_split(train, stratify=train['category'], test_size=self.model_training_config.test_size, random_state=123)
df_trn, df_val = train_test_split(train, stratify=train['category'], test_size=self.model_training_and_evaluation_config.test_size, random_state=123)
df_lm = pd.concat([df_trn, df_val], axis=0)[['content']]
return df_trn, df_val, df_lm

Expand All @@ -80,7 +81,7 @@ def create_dataloaders(self, df_lm) -> DataLoaders:
get_x=ColReader('text'),
splitter=RandomSplitter(0.1))

dls = dblock.dataloaders(df_lm, bs=self.model_training_config.batch_size_1)
dls = dblock.dataloaders(df_lm, bs=self.model_training_and_evaluation_config.batch_size_1)
return dls

def train_language_model(self, dls) -> Learner:
Expand All @@ -96,7 +97,7 @@ def train_language_model(self, dls) -> Learner:
log.info('Training Language Model Learner')
learn = language_model_learner(dls, AWD_LSTM, drop_mult=0.3, metrics=[accuracy]).to_fp16()
learn.lr_find()
learn.fine_tune(self.model_training_config.epochs_1, self.model_training_config.learning_rate_1)
learn.fine_tune(self.model_training_and_evaluation_config.epochs_1, self.model_training_and_evaluation_config.learning_rate_1)

log.info('Saving best Language Model Learner.')

Expand All @@ -123,7 +124,17 @@ def create_text_classifier_dataloaders(self, df_trn, dls_lm) -> DataLoaders:
get_y=ColReader('category'),
splitter=RandomSplitter(0.2))

return dblock.dataloaders(df_trn, bs=self.model_training_config.batch_size_2)
return dblock.dataloaders(df_trn, bs=self.model_training_and_evaluation_config.batch_size_2)

def log_to_mlflow(self, metrics: list) -> None:
os.environ['MLFLOW_TRACKING_URI'] = self.model_training_and_evaluation_config.mlflow_tracking_uri

dagshub.init(repo_owner=self.model_training_and_evaluation_config.mlflow_repo_owner, repo_name=self.model_training_and_evaluation_config.mlflow_repo_name, mlflow=True)

with mlflow.start_run():
mlflow.log_params(self.model_training_and_evaluation_config.all_params)
mlflow.log_metric('val_loss', metrics[0])
mlflow.log_metric('val_accuracy', metrics[1])

def train_text_classifier(self, dls) -> None:
"""
Expand All @@ -132,21 +143,24 @@ def train_text_classifier(self, dls) -> None:
Args:
dls (DataLoaders): Dataloaders for the text classifier.
"""

log.info('Training Text Classifier Learner.')

learn = text_classifier_learner(dls, AWD_LSTM, metrics=[accuracy]).to_fp16()
learn.load_encoder(f'language_model_learner')
learn.lr_find()
learn.fit_one_cycle(self.model_training_config.epochs_2, self.model_training_config.learning_rate_2)
learn.fit_one_cycle(self.model_training_and_evaluation_config.epochs_2, self.model_training_and_evaluation_config.learning_rate_2)
learn.freeze_to(-2)
learn.fit_one_cycle(self.model_training_config.epochs_3, slice(1e-3/(2.6**4), self.model_training_config.learning_rate_3))
learn.fit_one_cycle(self.model_training_and_evaluation_config.epochs_3, slice(1e-3/(2.6**4), self.model_training_and_evaluation_config.learning_rate_3))
learn.freeze_to(-3)
learn.fit_one_cycle(self.model_training_config.epochs_4, slice(5e-3/(2.6**4), self.model_training_config.learning_rate_4))
learn.fit_one_cycle(self.model_training_and_evaluation_config.epochs_4, slice(5e-3/(2.6**4), self.model_training_and_evaluation_config.learning_rate_4))
learn.unfreeze()
learn.fit_one_cycle(self.model_training_config.epochs_5, slice(1e-3/(2.6**4), self.model_training_config.learning_rate_5))
learn.fit_one_cycle(self.model_training_and_evaluation_config.epochs_5, slice(1e-3/(2.6**4), self.model_training_and_evaluation_config.learning_rate_5))
classifier_metrics = learn.validate()
self.log_to_mlflow(classifier_metrics)
learn.save_encoder(f'text_classifier_learner')

log.info("Saving best Text Classifier Learner.")

learn.save_encoder(f'text_classifier_learner')

def run_pipeline(self) -> None:
"""
Expand Down
Empty file.
18 changes: 12 additions & 6 deletions src/swahiliNewsClassifier/configuration/configuration.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from swahiliNewsClassifier.constants import *
from swahiliNewsClassifier.utilities.helper_functions import read_yaml, create_directories
from swahiliNewsClassifier.entity.entities import DataIngestionConfig, ModelTrainingConfig
from swahiliNewsClassifier.entity.entities import DataIngestionConfig, ModelTrainingAndEvaluationConfig
from dotenv import load_dotenv
import os

load_dotenv()

class ConfigurationManager:
def __init__(self, config_filepath=CONFIG_FILE_PATH,
Expand Down Expand Up @@ -38,16 +41,16 @@ def get_data_ingestion_config(self) -> DataIngestionConfig:
unzip_dir=config.unzip_dir
)

def get_model_training_config(self) -> ModelTrainingConfig:
def get_model_training_and_evaluation_config(self) -> ModelTrainingAndEvaluationConfig:
"""
Get the model training configuration.
Get the model training and evaluation configuration.
Returns:
ModelTrainingConfig: Configuration object for model training.
ModelTrainingConfig: Configuration object for model training and evaluation.
"""
create_directories([self.config.training.root_dir])

return ModelTrainingConfig(
return ModelTrainingAndEvaluationConfig(
root_dir=self.config.training.root_dir,
training_data=self.config.training.training_data_path,
test_size=self.params.TEST_SIZE,
Expand All @@ -63,6 +66,9 @@ def get_model_training_config(self) -> ModelTrainingConfig:
epochs_3=self.params.EPOCHS_3,
epochs_4=self.params.EPOCHS_4,
epochs_5=self.params.EPOCHS_5,
number_of_classes=self.params.NUMBER_OF_CLASSES,
mlflow_repo_name=os.getenv('MLFLOW_REPO_NAME'),
mlflow_tracking_uri=os.getenv('MLFLOW_TRACKING_URI'),
mlflow_repo_owner=os.getenv('MLFLOW_REPO_OWNER'),
all_params=self.params,

)
65 changes: 46 additions & 19 deletions src/swahiliNewsClassifier/entity/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,38 +13,61 @@ class DataIngestionConfig:
test_source_URL (str): The URL from which the test data will be fetched.
train_data_file (Path): The local file path where the downloaded training data will be stored.
test_data_file (Path): The local file path where the downloaded test data will be stored.
unzip_dir (Path): The directory where the downloaded data will be extracted or unzipped.
decompressed_dir (Path): The directory where the downloaded data will be extracted.
"""
root_dir: Path
train_source_URL: str
test_source_URL: str
train_data_file: Path
test_data_file: Path
unzip_dir: Path
decompressed_dir: Path


@dataclass(frozen=True)
class ModelTrainingConfig:
class ModelTrainingAndEvaluationConfig:
"""
Configuration class for model training using ULMFiT (Universal Language Model Fine-tuning).
Attributes:
test_size (float): Proportion of the dataset to include in the test split.
learning_rate_1 (float): Learning rate for training the language model learner.
learning_rate_2 (float): Learning rate for the first phase of classifier training.
learning_rate_3 (float): Learning rate for the second phase of classifier training.
learning_rate_4 (float): Learning rate for the third phase of classifier training.
learning_rate_5 (float): Learning rate for the fourth phase of classifier training.
batch_size_1 (int): Batch size for language model training.
batch_size_2 (int): Batch size for text classifier training.
epochs_1 (int): Number of epochs for training the language model learner.
epochs_2 (int): Number of epochs for the first phase of classifier training.
epochs_3 (int): Number of epochs for the second phase of classifier training.
epochs_4 (int): Number of epochs for the third phase of classifier training.
epochs_5 (int): Number of epochs for the fourth phase of classifier training.
training_data (Path): Path to the training data CSV file.
number_of_classes (int): Number of target classes in the classification task.
root_dir (Path): Root directory for storing model artifacts.
test_size (float): Proportion of the dataset to include in the test split. This parameter is used to split the dataset into training and validation sets.
learning_rate_1 (float): Learning rate for training the language model learner. This is used during the fine-tuning of the pre-trained language model.
learning_rate_2 (float): Learning rate for the first phase of classifier training. This is used in the initial phase of training the text classifier.
learning_rate_3 (float): Learning rate for the second phase of classifier training. This is used in the second phase of training the text classifier.
learning_rate_4 (float): Learning rate for the third phase of classifier training. This is used in the third phase of training the text classifier.
learning_rate_5 (float): Learning rate for the fourth phase of classifier training. This is used in the final phase of training the text classifier.
batch_size_1 (int): Batch size for language model training. This parameter defines the number of samples that will be propagated through the network at once during language model training.
batch_size_2 (int): Batch size for text classifier training. This parameter defines the number of samples that will be propagated through the network at once during text classifier training.
epochs_1 (int): Number of epochs for training the language model learner. This defines the number of complete passes through the training dataset.
epochs_2 (int): Number of epochs for the first phase of classifier training. This defines the number of complete passes through the training dataset in the first phase.
epochs_3 (int): Number of epochs for the second phase of classifier training. This defines the number of complete passes through the training dataset in the second phase.
epochs_4 (int): Number of epochs for the third phase of classifier training. This defines the number of complete passes through the training dataset in the third phase.
epochs_5 (int): Number of epochs for the fourth phase of classifier training. This defines the number of complete passes through the training dataset in the final phase.
training_data (Path): Path to the training data CSV file. This file contains the text data and corresponding labels for training and validation.
number_of_classes (int): Number of target classes in the classification task. This defines the number of unique labels in the dataset.
root_dir (Path): Root directory for storing model artifacts. This directory is used to save trained models, logs, and other artifacts.
mlflow_tracking_uri (str): URI for the MLflow tracking server. This is used to log and track experiments with MLflow.
mlflow_repo_name (str): Repository name for MLflow tracking. This is used to organize and identify different MLflow runs within the repository.
mlflow_repo_owner (str): Owner of the MLflow repository. This is used to identify the owner of the MLflow repository.
all_params (dict): Dictionary containing all parameters used for model training. This includes all hyperparameters and other settings for reproducibility and logging.
"""
test_size: float
learning_rate_1: float
Expand All @@ -62,3 +85,7 @@ class ModelTrainingConfig:
training_data: Path
number_of_classes: int
root_dir: Path
mlflow_tracking_uri: str
mlflow_repo_name: str
mlflow_repo_owner: str
all_params: dict
31 changes: 0 additions & 31 deletions src/swahiliNewsClassifier/pipeline/stage_02_model_training.py

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from swahiliNewsClassifier.configuration.configuration import ConfigurationManager
from swahiliNewsClassifier.components.model_training_and_evaluation import ModelTrainingAndEvaluation
from swahiliNewsClassifier import log

STAGE_NAME = "Model Training and Evaluation Stage"


class ModelTrainingAndEvaluationPipeline:
def __init__(self):
"""
Initialize the ModelTrainingAndEvaluationPipeline object.
"""
self.config = ConfigurationManager()

def main(self):
"""
Execute the model training and evaluation process.
"""
try:
model_training_and_evaluation_config = self.config.get_model_training_and_evaluation_config()
model_training_and_evaluation = ModelTrainingAndEvaluation(
model_training_and_evaluation_config=model_training_and_evaluation_config)
model_training_and_evaluation.run_pipeline()
except Exception as e:
log.exception(f"An error occurred during {STAGE_NAME}: {e}")
raise e


if __name__ == '__main__':
pipeline = ModelTrainingAndEvaluationPipeline()
pipeline.main()
Empty file.
Empty file.
10 changes: 2 additions & 8 deletions template.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,18 +30,14 @@ def create_file_with_directories(filepath: Path) -> None:
f"src/{project_name}/__init__.py",
f"src/{project_name}/components/__init__.py",
f"src/{project_name}/components/data_ingestion.py",
f"src/{project_name}/components/prediction_service.py",
f"src/{project_name}/components/model_training.py",
f"src/{project_name}/components/model_evaluation.py",
f"src/{project_name}/components/model_training_and_evaluation.py",
f"src/{project_name}/utilities/_init__.py",
f"src/{project_name}/utilities/helper_functions.py",
f"src/{project_name}/configuration/__init__.py",
f"src/{project_name}/configuration/configuration.py",
f"src/{project_name}/pipeline/__init__.py",
f"src/{project_name}/pipeline/stage_01_data_ingestion.py",
f"src/{project_name}/pipeline/stage_02_model_training.py",
f"src/{project_name}/pipeline/stage_03_model_evaluation.py",
f"src/{project_name}/pipeline/stage_04_prediction.py",
f"src/{project_name}/pipeline/stage_02_model_training_and_evaluation.py",
f"src/{project_name}/entity/__init__.py",
f"src/{project_name}/entity/entities.py",
f"src/{project_name}/constants/__init__.py",
Expand All @@ -57,8 +53,6 @@ def create_file_with_directories(filepath: Path) -> None:
"logs/20240608-124455.log",
"research/01_data_ingestion.ipynb",
"research/02_model_training.ipynb",
"research/03_model_evaluation.ipynb",
"templates/index.html",
"app.py",
"autopep.py",
".env",
Expand Down

0 comments on commit 279c6ec

Please sign in to comment.