In [21]:
%pwd

'd:\\ML\\LiveProject\\ML-CreditCardDefaulter'

In [22]:
import os

In [23]:
os.chdir('../')

In [26]:
%pwd

'd:\\ML\\LiveProject\\ML-CreditCardDefaulter'

In [62]:
from dataclasses import dataclass
from pathlib import Path

@dataclass(frozen=True)
class DataValidationConfig:
    root_dir: Path
    STATUS_FILE: str
    unzip_data_dir: Path
    all_schema: dict

@dataclass(frozen=True)
class ModelPredictionConfig:
    root_dir: Path
    predict_default_data_path: Path
    predict_data_path: Path
    predict_validation_status_file:Path
    all_schema: dict
    predict_kmeans_model_path: Path
    model_path:Path

@dataclass(frozen=True)
class DataTransformationConfig:
    root_dir: Path
    data_path: Path
    null_val_path: Path
@dataclass(frozen=True)
class ModelTrainerConfig:
    root_dir: Path
    train_data_path: Path
    model_kmeans_name: str
    model_xabost_name: str
    elbow_png:Path
    param_grid:dict
    param_grid_xgboost: dict

In [39]:
from ml_creditcard_defaulter.constants import *
from ml_creditcard_defaulter.utils.common import read_yaml, create_directories

In [63]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_data_validation_config(self) -> DataValidationConfig:
        config = self.config.data_validation
        schema = self.schema.ColName

        create_directories([config.root_dir])

        data_validation_config = DataValidationConfig(
            root_dir=config.root_dir,
            STATUS_FILE=config.STATUS_FILE,
            unzip_data_dir = config.unzip_data_dir,
            all_schema=schema,
        )

        return data_validation_config
    
    def get_model_predict_config(self) -> ModelPredictionConfig:
        config = self.config.model_prediction
        schema = self.schema.ColName
        
        create_directories([config.root_dir])

        model_predict_config = ModelPredictionConfig(
            root_dir=config.root_dir,
            predict_default_data_path=config.predict_default_data_path,
            predict_data_path = config.predict_data_path,
            predict_validation_status_file = config.predict_validation_status_file,
            all_schema=schema,
            predict_kmeans_model_path = config.predict_kmeans_model_path,
            model_path = config.model_path,
        )

        return model_predict_config
    def get_data_transformation_config(self) -> DataTransformationConfig:
        config = self.config.data_transformation

        create_directories([config.root_dir])

        data_transformation_config = DataTransformationConfig(
            root_dir=config.root_dir,
            data_path=config.data_path,
            null_val_path = config.null_val_path,
        )

        return data_transformation_config
    def get_model_trainer_config(self) -> ModelTrainerConfig:
        config = self.config.model_trainer
        params = self.params
        params.param_grid["var_smoothing"] = [float(v) for v in params.param_grid["var_smoothing"]]
        # schema =  self.schema.TARGET_COLUMN

        create_directories([config.root_dir])

        model_trainer_config = ModelTrainerConfig(
            root_dir=config.root_dir,
            train_data_path = config.train_data_path,
            model_kmeans_name = config.model_kmeans_name,
            model_xabost_name = config.model_xabost_name,
            elbow_png=config.elbow_png,
            param_grid=params.param_grid,
            param_grid_xgboost=params.param_grid_xgboost
        )

        return model_trainer_config

In [30]:
import os
import urllib.request as request
import zipfile
from ml_creditcard_defaulter import logger
from ml_creditcard_defaulter.utils.common import get_size

In [31]:
import pandas as pd

In [34]:
class DataValiadtion:
    def __init__(self, config: DataValidationConfig):
        self.config = config


    def validate_all_columns(self, file_path, stutus_file)-> bool:
        try:
            validation_status = None

            data = pd.read_csv(file_path)
            all_cols = list(data.columns)

            all_schema = self.config.all_schema.keys()

            
            for col in all_cols:
                if col not in all_schema:
                    validation_status = False
                    with open(stutus_file, 'w') as f:
                        f.write(f"Validation status: {validation_status}")
                else:
                    validation_status = True
                    with open(stutus_file, 'w') as f:
                        f.write(f"Validation status: {validation_status}")

            return validation_status
        
        except Exception as e:
            raise e

  

In [42]:
try:
    config = ConfigurationManager()
    get_model_predict_config = config.get_model_predict_config()
    model_predict = DataValiadtion(config=get_model_predict_config)
    model_predict.validate_all_columns(get_model_predict_config.predict_default_data_path, get_model_predict_config.predict_validation_status_file)
except Exception as e:
    raise e

[2025-01-25 12:43:52,924: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-01-25 12:43:52,933: INFO: common: yaml file: params.yaml loaded successfully]
[2025-01-25 12:43:52,935: INFO: common: yaml file: schema.yaml loaded successfully]
[2025-01-25 12:43:52,938: INFO: common: created directory at: artifacts]
[2025-01-25 12:43:52,941: INFO: common: created directory at: artifacts/model_prediction]


In [44]:
import os
import urllib.request as request
import zipfile
from ml_creditcard_defaulter import logger
from ml_creditcard_defaulter.components.data_transformation import DataTransformation

In [58]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from kneed import KneeLocator
from sklearn.linear_model import ElasticNet
import joblib
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics  import roc_auc_score,accuracy_score
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from ml_creditcard_defaulter.components.model_training import ModelTrainer
import pickle

In [127]:
class ModelPrediction:
    def __init__(self, config: ModelPredictionConfig):
        self.config = config

    def find_correct_model_file(self,cluster_number):
            logger.info('Entered the find_correct_model_file method of the File_Operation class')
            try:
                folder_name=self.config.model_path
                list_of_files = os.listdir(folder_name)

                for file in list_of_files:
                    print(file,'*******************************', cluster_number)
                    try:
                        if (file.index(str( cluster_number))!=-1):
                            model_name=file
                            
                    except:
                        continue
                model_name=model_name.split('.')[0]
                logger.info('Exited the find_correct_model_file method of the Model_Finder class.')
                return model_name
            except Exception as e:
                logger.info('Exception occured in find_correct_model_file method of the Model_Finder class. Exception message:  ' + str(e))
                logger.info('Exited the find_correct_model_file method of the Model_Finder class with Failure')
                raise Exception()
            
    def load_model(self,filename):
        logger.info( 'Entered the load_model method of the File_Operation class')
        try:
            with open(self.config.model_path + '/' + filename ,'rb') as f:
                logger.info('Model File ' + filename + ' loaded. Exited the load_model method of the Model_Finder class')
                return pickle.load(f)
        except Exception as e:
            logger.info('Exception occured in load_model method of the Model_Finder class. Exception message:  ' + str( e))
            logger.info('Model File ' + filename + ' could not be saved. Exited the load_model method of the Model_Finder class')
            raise Exception()

In [128]:
try:
    config = ConfigurationManager()
    model_training_config = config.get_model_trainer_config()
    model_training = ModelTrainer(config=model_training_config)
    data_transformation_config = config.get_data_transformation_config()
    data_transformation = DataTransformation(config=data_transformation_config)
    model_prediction_config = config.get_model_predict_config()
    model_prediction = ModelPrediction(config=model_prediction_config)
    
    data = pd.read_csv(get_model_predict_config.predict_default_data_path)
    
    is_null_present,cols_with_missing_values=data_transformation.is_null_present(data)
    
    if(is_null_present):
        data=data_transformation.impute_missing_values(data,cols_with_missing_values)

    X = data_transformation.scale_numerical_columns(data)

    kmeans_model = joblib.load(model_prediction_config.predict_kmeans_model_path)

    clusters=kmeans_model.predict(X)#drops the first column for cluster prediction
    
    X['clusters']=clusters
    clusters=X['clusters'].unique()
    predictions=[]
    for i in clusters:
        cluster_data= X[X['clusters']==i]
        cluster_data = cluster_data.drop(['clusters'],axis=1)
        model_name = model_prediction.find_correct_model_file(i)
        model = model_prediction.load_model(model_name)
        result=(model.predict(cluster_data))

        final= pd.DataFrame(list(zip(result)),columns=['Predictions'])
        path="Prediction_Output_File/Predictions.csv"
        final.to_csv("Prediction_Output_File/Predictions.csv",header=True,mode='a+') #appends result to prediction file
        logger.info('End of Prediction')
except Exception as ex:
     logger.info( 'Error occured while running the prediction!! Error:: %s' % ex)
     raise ex


[2025-01-25 17:04:30,795: INFO: common: yaml file: config\config.yaml loaded successfully]
[2025-01-25 17:04:30,801: INFO: common: yaml file: params.yaml loaded successfully]
[2025-01-25 17:04:30,809: INFO: common: yaml file: schema.yaml loaded successfully]


[2025-01-25 17:04:30,812: INFO: common: created directory at: artifacts]
[2025-01-25 17:04:30,816: INFO: common: created directory at: artifacts/model_trainer]
[2025-01-25 17:04:30,819: INFO: common: created directory at: artifacts/data_transformation]
[2025-01-25 17:04:30,821: INFO: common: created directory at: artifacts/model_prediction]
[2025-01-25 17:04:30,828: INFO: data_transformation: Finding missing values is a success.Data written to the null values file. Exited the is_null_present method of the Preprocessor class]
[2025-01-25 17:04:30,829: INFO: data_transformation: Entered the scale_numerical_columns method of the Preprocessor class]
[2025-01-25 17:04:30,835: INFO: data_transformation: scaling for numerical values successful. Exited the scale_numerical_columns method of the Preprocessor class]
[2025-01-25 17:04:30,837: INFO: 3262367771: Entered the find_correct_model_file method of the File_Operation class]
NaiveBayes0 ******************************* 1
NaiveBayes1 *********

  if null_counts[i]>0:


Exception: 

In [123]:
kmeans_model = joblib.load('artifacts/model_trainer/model_kmeans.pkl')

In [124]:
kmeans_model

array([1, 1, 1, ..., 1, 1, 1], shape=(30000,), dtype=int32)

In [73]:
X.columns

Index(['LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6'],
      dtype='object')

In [79]:
tr = pd.read_csv(r'artifacts\data_transformation\train.csv')

In [80]:
tr.columns

Index(['LIMIT_BAL', 'SEX', 'EDUCATION', 'MARRIAGE', 'AGE', 'PAY_0', 'PAY_2',
       'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6', 'BILL_AMT1', 'BILL_AMT2',
       'BILL_AMT3', 'BILL_AMT4', 'BILL_AMT5', 'BILL_AMT6', 'PAY_AMT1',
       'PAY_AMT2', 'PAY_AMT3', 'PAY_AMT4', 'PAY_AMT5', 'PAY_AMT6', 'Labels'],
      dtype='object')

In [88]:
len(kmeans_model)

30000

In [90]:
joblib.load('artifacts/model_trainer/NaiveBayes0.pkl')

In [91]:
kmeans_model

array([1, 1, 1, ..., 1, 1, 1], shape=(30000,), dtype=int32)