In [1]:
import os

In [2]:
%pwd

'c:\\Users\\justi\\Desktop\\MLOps Demo\\MLOPS-Demo-Project\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\justi\\Desktop\\MLOps Demo\\MLOPS-Demo-Project'

In [5]:
from dataclasses import dataclass
from pathlib import Path

#need to change according to my training needs
@dataclass(frozen=True)
class ModelTrainingConfig:
    root_dir: Path
    train_data_path: Path
    test_data_path: Path
    model_name: str
    # alpha: float
    # l1_ratio: float
    target_column: str
    learning_rate: float
    input_dim: int 
    hidden1_dim: int 
    hidden2_dim: int 
    output_dim: int
    criterion: str
    optimizer: str
    num_epochs: int

In [6]:
from src.mlops_project.constants import *
from src.mlops_project.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])

    def get_model_training_config(self) -> ModelTrainingConfig:
        config = self.config.model_training
        params = self.params.HeartDiseaseNN
        schema = self.schema.TARGET_COLUMN


        create_directories([config.root_dir])

        model_training_config = ModelTrainingConfig(
            root_dir=config.root_dir,
            train_data_path=config.train_data_path,
            test_data_path=config.test_data_path,
            model_name = config.model_name,
            target_column = schema.name,
            learning_rate = params.learning_rate,
            input_dim = params.input_dim,
            hidden1_dim = params.hidden1_dim,
            hidden2_dim = params.hidden2_dim,
            output_dim = params.output_dim,
            criterion = params.criterion,
            optimizer = params.optimizer,
            num_epochs = params.num_epochs
        )
        
        return model_training_config


In [8]:
import os
from src.mlops_project import logger
from sklearn.model_selection import train_test_split
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import Adam
from torch.utils.data import DataLoader, TensorDataset
import torch


In [9]:
class HeartDiseaseNN(nn.Module):
    def __init__(self, input_dim, hidden1_dim, hidden2_dim, output_dim):
        super(HeartDiseaseNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden1_dim)    # Input layer to first hidden layer
        self.fc2 = nn.Linear(hidden1_dim, hidden2_dim)  # First hidden layer to second hidden layer
        self.fc3 = nn.Linear(hidden2_dim, output_dim)   # Second hidden layer to output layer

    def forward(self, x):
        x = F.relu(self.fc1(x))  # Activation function for the first hidden layer
        x = F.relu(self.fc2(x))  # Activation function for the second hidden layer
        x = self.fc3(x)          # Output layer
        return x

In [10]:
def get_optimizer(optimizer_str: str, learning_rate: float, model):
    if optimizer_str == "adam":
        optimizer = Adam(model.parameters(), lr=learning_rate)
    return optimizer

def get_criterion(criterion_str: str):
    if criterion_str == "cross entropy loss":
        criterion = nn.CrossEntropyLoss()
    return criterion


In [11]:
class ModelTraining:
    def __init__(self, config: ModelTrainingConfig) -> None:
        self.config = config
    
    def train(self):
        # Display configuration details
        print("Input Dimension:", self.config.input_dim)
        print("Hidden Layer 1 Dimension:", self.config.hidden1_dim)
        print("Hidden Layer 2 Dimension:", self.config.hidden2_dim)
        print("Output Dimension:", self.config.output_dim)

        # Load training and testing data
        train_data = pd.read_csv(self.config.train_data_path)
        test_data = pd.read_csv(self.config.test_data_path)

        # Preparing the datasets
        X_train = train_data.drop([self.config.target_column], axis=1)
        y_train = train_data[self.config.target_column]
        X_test = test_data.drop([self.config.target_column], axis=1)
        y_test = test_data[self.config.target_column]

        # Converting data to tensors
        X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
        y_train_tensor = torch.tensor(y_train.values, dtype=torch.long)
        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        X_train_tensor = X_train_tensor.to(device)
        y_train_tensor = y_train_tensor.to(device)

        # Debugging outputs
        print(f"Type of X_train: {type(X_train)}, Shape of X_train: {X_train.shape}")
        print(f"Type of y_train: {type(y_train)}, Shape of y_train: {y_train.shape}")

        # DataLoader preparation
        train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
        train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)

        # Model setup
        model = HeartDiseaseNN(self.config.input_dim, self.config.hidden1_dim, self.config.hidden2_dim, self.config.output_dim).to(device)
        optimizer = get_optimizer(self.config.optimizer, self.config.learning_rate, model)
        criterion = get_criterion(self.config.criterion)
        
        # Training loop
        results = self.training_loop(model, train_loader, criterion, optimizer, self.config.num_epochs, os.path.join(self.config.root_dir, self.config.model_name))
        return results

    def training_loop(self, model, train_loader, criterion, optimizer, num_epochs, model_path):
        model.train()
        for epoch in range(num_epochs):
            running_loss, correct, total = 0.0, 0, 0
            results = {"losses": [], "accuracies": []}

            for data, labels in train_loader:
                optimizer.zero_grad()
                outputs = model(data)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            
            epoch_loss = running_loss / len(train_loader)
            epoch_acc = 100 * correct / total
            results["losses"].append(epoch_loss)
            results["accuracies"].append(epoch_acc)

            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {epoch_loss:.4f}, Accuracy: {epoch_acc:.2f}%')

            # Save the model after each epoch
            torch.save(model.state_dict(), model_path)

        return results

In [12]:
try:
    config = ConfigurationManager()
    model_training_config = config.get_model_training_config()
    model_training_config = ModelTraining(config=model_training_config)
    model_training_config.train()
except Exception as e:
    raise e
    

[2024-06-07 05:07:45,606: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-06-07 05:07:45,608: INFO: common: yaml file: params.yaml loaded successfully]
[2024-06-07 05:07:45,609: INFO: common: yaml file: schema.yaml loaded successfully]
[2024-06-07 05:07:45,610: INFO: common: created directory at: artifacts]
[2024-06-07 05:07:45,611: INFO: common: created directory at: artifacts/model_training]
Input Dimension: 15
Hidden Layer 1 Dimension: 30
Hidden Layer 2 Dimension: 30
Output Dimension: 2
Type of X_train: <class 'pandas.core.frame.DataFrame'>, Shape of X_train: (2755, 15)
Type of y_train: <class 'pandas.core.series.Series'>, Shape of y_train: (2755,)
Epoch [1/15], Loss: nan, Accuracy: 80.36%
Epoch [2/15], Loss: nan, Accuracy: 83.88%
Epoch [3/15], Loss: nan, Accuracy: 83.88%
Epoch [4/15], Loss: nan, Accuracy: 83.88%
Epoch [5/15], Loss: nan, Accuracy: 83.88%
Epoch [6/15], Loss: nan, Accuracy: 83.88%
Epoch [7/15], Loss: nan, Accuracy: 83.88%
Epoch [8/15], Loss: nan

In [13]:


    
def evaluate_model(model, test_loader, criterion):
    model.eval()
    running_loss, correct, total = 0.0, 0, 0
    results = {"losses": [], "accuracies": []}

    with torch.no_grad():
        for data, labels in test_loader:
            outputs = model(data)
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    average_loss = running_loss / len(test_loader)  
    accuracy = 100 * correct / total
    results["losses"].append(average_loss)
    results["accuracies"].append(accuracy)
    print(f'Accuracy of the model on the test data: {accuracy:.2f}%, Loss: {average_loss:.4f}')
    return results


In [14]:
train_data = pd.read_csv("artifacts/data_transformation/train.csv")
train_data = train_data.drop(["CHDRisk"], axis=1)
train_data

Unnamed: 0,sex,age,education,smokingStatus,cigsPerDay,BPMeds,prevalentStroke,prevalentHyp,diabetes,totChol,sysBP,diaBP,BMI,heartRate,glucose
0,1.0,48,2,0.0,0,0,0,0,0,237,124.5,66.5,33.29,80,91
1,1.0,46,2,1.0,20,0,0,1,0,271,158.0,94.0,25.17,78,71
2,0.0,60,3,0.0,0,0,0,1,0,276,144.0,78.0,26.98,60,88
3,1.0,41,1,1.0,4,0,0,0,0,176,113.0,75.0,22.29,80,55
4,0.0,48,2,1.0,40,0,0,0,0,226,117.5,80.0,26.18,60,66
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2750,0.0,54,3,1.0,43,0,0,0,0,288,145.0,92.5,26.20,72,98
2751,0.0,44,1,0.0,0,0,0,0,0,195,114.0,79.0,25.01,60,76
2752,1.0,48,3,0.0,0,0,0,0,0,197,107.0,73.0,19.78,63,76
2753,1.0,42,2,0.0,0,0,0,1,0,230,142.5,97.5,29.94,75,75
