In [1]:
import os
%pwd

'c:\\Users\\bisht\\OneDrive\\Desktop\\Personal_project\\research'

In [2]:
os.chdir('../')

In [3]:
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class TrainingConfig:
    root_dir:Path
    cnn_trained_model_path:Path
    updated_base_model_path:Path
    training_data:Path
    params_epochs:int
    params_batch_size:int
    params_image_size:list
    credit_score_model:Path
    transaction_model:Path
    params_is_augmentation: bool
    transaction_dir:Path
    credit_score_dir:Path
    




In [4]:
from src.Banking_System.constants import *
from src.Banking_System.utils.common import read_yaml,create_directories


In [5]:
class ConfigurationManager:
    def __init__(
            self,
            config_filepath=CONFIG_FILE_PATH,
            params_filepath=PARAMS_FILE_PATH
    ):
        self.config=read_yaml(config_filepath)
        self.params=read_yaml(params_filepath)
        create_directories([self.config.artifacts_root])
    def get_training_config(self) -> TrainingConfig:
        training = self.config.training
        prepare_base_model = self.config.prepare_base_model
        params = self.params
        training_data = os.path.join(self.config.data_ingestion.unzip_dir,'data\\train')
        transaction_dir=self.config.data_ingestion.transaction_dir
        credit_score_dir=self.config.data_ingestion.credit_score_dir
        
        create_directories([
            Path(training.root_dir)
        ])

        training_config = TrainingConfig(
            root_dir=Path(training.root_dir),
            cnn_trained_model_path=Path(training.cnn_trained_model_path),
            updated_base_model_path=Path(prepare_base_model.updated_base_model_path),
            training_data=Path(training_data),
            params_epochs=params.EPOCHS,
            params_batch_size=params.BATCH_SIZE,
            params_is_augmentation=params.AUGMENTATION,
            params_image_size=params.IMAGE_SIZE,
            credit_score_model=Path(training.credit_score_model),
            transaction_model=Path(training.transaction_model),
            credit_score_dir=Path(credit_score_dir),
            transaction_dir=Path(transaction_dir)

        )

        return training_config

In [6]:
import os
import urllib.request as request
from zipfile import ZipFile
import tensorflow as tf
import time
import pandas as pd
from src.Banking_System.utils.common import evaluate_model_transaction,evaluate_model_creditscore,save_object
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from src.Banking_System import logger

In [7]:
class Training:
    def __init__(self, config: TrainingConfig):
        self.config = config
      
    def get_base_model(self):
        self.model = tf.keras.models.load_model(
            self.config.updated_base_model_path
        )

    def train_valid_generator(self):

        datagenerator_kwargs = dict(
            rescale = 1./255,
            validation_split=0.20
        )

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size,
            interpolation="bilinear"
        )

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
            **datagenerator_kwargs
        )

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        )

        if self.config.params_is_augmentation:
            train_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(
                shear_range=0.2,
                zoom_range=0.2,
                 horizontal_flip=True,
                **datagenerator_kwargs
            )
        else:
            train_datagenerator = valid_datagenerator

        self.train_generator = train_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="training",
            shuffle=True,
            **dataflow_kwargs
        )

    
    @staticmethod
    def save_model(path: Path, model: tf.keras.Model):
        model.save(path)



    
    def cnn_train(self):
        self.steps_per_epoch = self.train_generator.samples // self.train_generator.batch_size
        self.validation_steps = self.valid_generator.samples // self.valid_generator.batch_size

        self.model.fit(
            self.train_generator,
            epochs=self.config.params_epochs,
            steps_per_epoch=self.steps_per_epoch,
            validation_steps=self.validation_steps,
            validation_data=self.valid_generator
        )

        self.save_model(
            path=self.config.trained_model_path,
            model=self.model
        )

  
    def transaction_model_train(self):
        Train_data=pd.read_csv(os.path.join(self.config.transaction_dir,'transaction_train.csv'))
        Test_data=pd.read_csv(os.path.join(self.config.transaction_dir,'transaction_test.csv'))

        try:
            X_train=Train_data.drop(columns=['isFraud','Unnamed: 0'])
            y_train=Train_data['isFraud']
            X_test=Test_data.drop(columns=['isFraud','Unnamed: 0'])
            y_test=Test_data['isFraud']
            models={
                'RandomForest':RandomForestClassifier(),
                'LogisticRegression':LogisticRegression(),
                'KNeighborsClassifier':KNeighborsClassifier(),
                'GradientBoostingClassifier':GradientBoostingClassifier(),
                'SVC':SVC(),
                'XGBClassifier':XGBClassifier()
            }
            model_report:dict=evaluate_model_transaction(X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test,models=models)
            best_model_score=max(sorted(model_report.values()))
            best_model_name=list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]
            best_model=models[best_model_name]
            print(best_model_score,best_model)
            if best_model_score<0.9:
                raise Exception('No Best model found')
            logger.info(f'Best found model on both training and testing dataset for fraud')
            save_object(self.config.transaction_model,best_model)
            
        except Exception as e:
            raise e
    def creditscore_model_train(self):
        Train_data=pd.read_csv(os.path.join(self.config.credit_score_dir,'credit_train.csv'))
        Train_data.drop(columns=['Unnamed: 0'],inplace=True)
        Test_data=pd.read_csv(os.path.join(self.config.credit_score_dir,'credit_test.csv'))
        Test_data.drop(columns=['Unnamed: 0'],inplace=True)
        try:
            X_train=Train_data.drop(columns=['Credit Score'])
            y_train=Train_data['Credit Score']
            X_test=Test_data.drop(columns=['Credit Score'])
            y_test=Test_data['Credit Score']
            models=models={
                'RandomForest':RandomForestClassifier(),
                'LogisticRegression':LogisticRegression(),
                'KNeighborsClassifier':KNeighborsClassifier(),
                'GradientBoostingClassifier':GradientBoostingClassifier(),
                'SVC':SVC(),
                'XGBClassifier':XGBClassifier()
            }
            model_report:dict=evaluate_model_creditscore(X_train=X_train,y_train=y_train,X_test=X_test,y_test=y_test,models=models)
            best_model_score=max(sorted(model_report.values()))
            best_model_name=list(model_report.keys())[
                list(model_report.values()).index(best_model_score)
            ]
            best_model=models[best_model_name]
            print(best_model_score,best_model)
            if best_model_score<0.9:
                raise Exception('No Best model found')
            logger.info(f'Best found model on both training and testing dataset for credit')
            save_object(self.config.credit_score_model,best_model)
            
        except Exception as e:
            raise e

            

            

                

In [8]:
try:
    config=ConfigurationManager()
    training_config=config.get_training_config()
    training=Training(config=training_config)
    training.cnn_train()
    training.transaction_model_train()
    training.creditscore_model_train()
except Exception as e:
    raise e

[2024-07-23 22:28:29,775: INFO: common: yaml file: config\config.yaml loaded successfully]
[2024-07-23 22:28:29,784: INFO: common: yaml file: params.yaml loaded successfully]
[2024-07-23 22:28:29,789: INFO: common: created directory at: artifacts]
[2024-07-23 22:28:29,791: INFO: common: created directory at: artifacts\training]
1.0 RandomForestClassifier()
[2024-07-23 22:29:05,378: INFO: 3802236235: Best found model on both training and testing dataset for fraud]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


1.0 GradientBoostingClassifier()
[2024-07-23 22:29:07,451: INFO: 3802236235: Best found model on both training and testing dataset for credit]


In [9]:
import pandas as pd
pd.read_csv(r'C:\Users\bisht\OneDrive\Desktop\Personal_project\artifacts\data_ingestion\transaction_data\transaction_train.csv')

Unnamed: 0.1,Unnamed: 0,amount,oldbalanceOrg,newbalanceOrig,isFraud
0,3674432,18353.10,84926.31,66573.21,0
1,2546147,242969.93,26966073.90,27209043.83,0
2,219379,12396.64,180992.22,168595.58,0
3,1981095,209743.85,4607817.81,4817561.65,0
4,4507052,239116.87,5628300.97,5867417.84,0
...,...,...,...,...,...
13063,6009612,5429153.98,5429153.98,0.00,1
13064,6286329,48516.87,48516.87,0.00,1
13065,6260944,277048.48,277048.48,0.00,1
13066,5963369,36210.65,311431.61,275220.96,0
