In [1]:
import os

In [2]:
%pwd

'c:\\Users\\artit\\IKP_2025\\fraud_prediction\\research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'c:\\Users\\artit\\IKP_2025\\fraud_prediction'

In [5]:
from dataclasses import dataclass
from pathlib import Path 

@dataclass(frozen=True)
class PrepareBaseModelConfig: 
    root_dir : Path 
    base_model_path : Path
    update_base_model_path : Path
    training_data: Path
    params_num_features : list 
    params_learning_rate : float
    params_include_top: bool
    params_weights: str
    params_classes : int

In [6]:
from fraud_prediction.constants import * 
from fraud_prediction.utils.common import read_yaml, create_directories
from fraud_prediction.entity.config_entity import TrainingConfig
import tensorflow as tf 

[2026-01-26 14:48:37,405: INFO: utils: Note: NumExpr detected 22 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 16.]
[2026-01-26 14:48:37,406: INFO: utils: NumExpr defaulting to 16 threads.]


In [7]:
import tensorflow as tf 
import numpy as np

tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()

os.environ["KERAS_BACKEND"]="tensorflow"

In [8]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)

        create_directories([self.config.artifacts_root])

    def get_training_config(self) -> TrainingConfig:
        training_config = self.config.training
        prepare_base_model = self.config.prepare_base_model 
        params = self.params 

        training_data = self.config.data_ingestion.unzip_dir
        
        create_directories([
            Path(training_config.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir = Path(training_config.root_dir),
            trained_model_path = Path(training_config.trained_model_path),
            update_base_model_path=Path(prepare_base_model.update_base_model_path),
            training_data = Path(training_data),
            params_epochs = params.EPOCHS,
            params_batch_size = params.BATCH_SIZE,
            params_is_augmentation = params.AUGMENTATION,
            params_num_features = params.NUM_FEATURES,
            params_sampling_ratio=params.SAMPLING_RATIO
        )

        return training_config

In [9]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time

In [16]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from pathlib import Path
import tensorflow as tf 
import os
import glob

class Training: 
    def __init__(self, config: TrainingConfig):
        self.config = config
        
    def get_base_model(self):
        """‡πÇ‡∏´‡∏•‡∏î model ANN ‡∏ó‡∏µ‡πà‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡πÑ‡∏ß‡πâ‡∏à‡∏≤‡∏Å stage_02"""
        self.model = tf.keras.models.load_model(
            self.config.update_base_model_path
        )

        # Re-compile immediatly with new optimizer 
        self.model.compile(
            optimizer='adam',
            loss='binary_crossentropy',
            metrics=['accuracy']
        )
        print("Model Downloaded and Re-Compile already and ready for new training")

    def prepare_data(self):
        """‡πÄ‡∏ï‡∏£‡∏µ‡∏¢‡∏°‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ï‡∏≤‡∏°‡∏•‡∏≠‡∏à‡∏¥‡∏Å Notebook"""
        # 1. ‡πÇ‡∏´‡∏•‡∏î‡πÑ‡∏ü‡∏•‡πå CSV
        data_dir = self.config.training_data
        csv_files = glob.glob(os.path.join(data_dir, "**/*.csv"), recursive=True)
        df = pd.read_csv(csv_files[0])

        fraud_df = df[df['isFraud'] == 1]
        normal_df = df[df['isFraud'] == 0]
        
        ratio = self.config.params_sampling_ratio
        n_normal=len(fraud_df)*ratio 

        # ‡∏ï‡∏£‡∏ß‡∏à‡∏™‡∏≠‡∏ö‡∏ß‡πà‡∏≤‡∏°‡∏µ‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏õ‡∏Å‡∏ï‡∏¥‡∏û‡∏≠‡πÉ‡∏´‡πâ‡∏™‡∏∏‡πà‡∏°‡πÑ‡∏´‡∏°
        n_normal = min(n_normal, len(normal_df)) 
        
        normal_downsampled = normal_df.sample(n=n_normal, random_state=42)
        
        df = pd.concat([fraud_df, normal_downsampled])
        df = df.sample(frac=1, random_state=42)

        print(f"‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡πÉ‡∏ä‡πâ‡πÄ‡∏ó‡∏£‡∏ô (Ratio 1:{ratio}): ‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î {df.shape[0]} ‡πÅ‡∏ñ‡∏ß")

        # 2. Feature Engineering
        df['diff_new_old_balance'] = df['newbalanceOrig'] - df['oldbalanceOrg']
        df['diff_new_old_destiny'] = df['newbalanceDest'] - df['oldbalanceDest']

        # 3. Feature Selection & One-Hot Encoding
        cols_to_drop = ['nameOrig', 'nameDest', 'isFlaggedFraud','step_weeks', 'step_days'] 
        df = df.drop(columns=[c for c in cols_to_drop if c in df.columns], errors='ignore')
        df = pd.get_dummies(df, columns=['type'], dtype=int)
        df = df.dropna()

        # 4. ‡πÅ‡∏¢‡∏Å Feature ‡πÅ‡∏•‡∏∞ Target (isFraud)
        target_col = 'isFraud'
        X = df.drop(columns=[target_col])
        y = df[target_col]

        # 5. Seperate Data and Scaling
        X_train_raw, X_valid_raw, y_train_raw, y_valid_raw = train_test_split(
            X, y, test_size=0.2, random_state=42, stratify=y
        )

        scaler = MinMaxScaler()
        X_train_scaled = scaler.fit_transform(X_train_raw)
        X_valid_scaled = scaler.transform(X_valid_raw)

        # 6. ‡∏ö‡∏±‡∏ô‡∏ó‡∏∂‡∏Å‡∏ú‡∏•‡∏•‡∏±‡∏û‡∏ò‡πå‡∏•‡∏á‡πÉ‡∏ô Class Attribute (‡πÅ‡∏õ‡∏•‡∏á‡πÄ‡∏õ‡πá‡∏ô float32 ‡∏ó‡∏±‡∏ô‡∏ó‡∏µ)
        self.X_train = np.asarray(X_train_scaled).astype('float32')
        self.X_valid = np.asarray(X_valid_scaled).astype('float32')
        self.y_train = np.asarray(y_train_raw).astype('float32')
        self.y_valid = np.asarray(y_valid_raw).astype('float32')
        
        print(f" Prepared Data Done and ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô Features ‡∏™‡∏∏‡∏î‡∏ó‡πâ‡∏≤‡∏¢: {self.X_train.shape[1]}")

    def train(self):
        """‡πÄ‡∏£‡∏¥‡πà‡∏°‡πÄ‡∏ó‡∏£‡∏ô‡πÇ‡∏°‡πÄ‡∏î‡∏• ANN (‡πÄ‡∏ß‡∏≠‡∏£‡πå‡∏ä‡∏±‡∏ô‡∏™‡∏°‡∏ö‡∏π‡∏£‡∏ì‡πå‡∏™‡∏≥‡∏´‡∏£‡∏±‡∏ö‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ï‡∏≤‡∏£‡∏≤‡∏á)"""
        # 1. ‡πÅ‡∏õ‡∏•‡∏á‡πÄ‡∏õ‡πá‡∏ô Tensor ‡πÄ‡∏û‡∏∑‡πà‡∏≠‡∏õ‡∏£‡∏∞‡∏™‡∏¥‡∏ó‡∏ò‡∏¥‡∏†‡∏≤‡∏û‡πÅ‡∏•‡∏∞‡∏Ñ‡∏ß‡∏≤‡∏°‡πÄ‡∏™‡∏ñ‡∏µ‡∏¢‡∏£‡∏ö‡∏ô TensorFlow
        X_train_tensor = tf.convert_to_tensor(self.X_train, dtype=tf.float32)
        y_train_tensor = tf.convert_to_tensor(self.y_train, dtype=tf.float32)
        X_valid_tensor = tf.convert_to_tensor(self.X_valid, dtype=tf.float32)
        y_valid_tensor = tf.convert_to_tensor(self.y_valid, dtype=tf.float32)

        print(f"üöÄ ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏ó‡∏£‡∏ô‡∏î‡πâ‡∏ß‡∏¢‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• Shape: {X_train_tensor.shape}")

        # 2. Start to train 
        self.history = self.model.fit(
            X_train_tensor,
            y_train_tensor,
            epochs=self.config.params_epochs,
            batch_size=self.config.params_batch_size,
            validation_data=(X_valid_tensor, y_valid_tensor),
            verbose=1
        )

        # 3. Save model
        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        path.parent.mkdir(parents=True, exist_ok=True)
        model.save(str(path))

In [17]:
os.environ["KERAS_BACKEND"] = "tensorflow"
tf.config.run_functions_eagerly(True)
tf.data.experimental.enable_debug_mode()



In [18]:
try: 
    config = ConfigurationManager()
    training_config = config.get_training_config()
    training = Training(config=training_config)

    training.get_base_model()
    training.prepare_data()

    print(f"Start to training data {training.X_train.shape[0]} transactions")
    training.train()
    print("Training completed successfully.")

except Exception as e:
    raise e

[2026-01-26 14:50:33,753: INFO: common: yaml file: config\config.yaml loaded successfully]
[2026-01-26 14:50:33,769: INFO: common: yaml file: params.yaml loaded successfully]
[2026-01-26 14:50:33,772: INFO: common: created directory at: artifacts]
[2026-01-26 14:50:33,773: INFO: common: created directory at: artifacts\training]
Model Downloaded and Re-Compile already and ready for new training
‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏•‡∏ó‡∏µ‡πà‡πÉ‡∏ä‡πâ‡πÄ‡∏ó‡∏£‡∏ô (Ratio 1:50): ‡∏ó‡∏±‡πâ‡∏á‡∏´‡∏°‡∏î 41871 ‡πÅ‡∏ñ‡∏ß
 Prepared Data Done and ‡∏à‡∏≥‡∏ô‡∏ß‡∏ô Features ‡∏™‡∏∏‡∏î‡∏ó‡πâ‡∏≤‡∏¢: 13
Start to training data 33496 transactions
üöÄ ‡πÄ‡∏£‡∏¥‡πà‡∏°‡∏ï‡πâ‡∏ô‡∏Å‡∏≤‡∏£‡πÄ‡∏ó‡∏£‡∏ô‡∏î‡πâ‡∏ß‡∏¢‡∏Ç‡πâ‡∏≠‡∏°‡∏π‡∏• Shape: (33496, 13)
Epoch 1/5
[1m33/33[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m2s[0m 53ms/step - accuracy: 0.9412 - loss: 0.4507 - val_accuracy: 0.9804 - val_loss: 0.2359
Epoch 2/5
[1m33/33[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚î