In [1]:
import sys
sys.path.insert(0,'../src/')
from utils.utils import load_config_file

In [2]:
from data.data_load import DataLoad

In [3]:
dl = DataLoad()

In [4]:
df = dl.load_data('train_dataset_name')

2024-01-16 08:07:18 [info     ] Initiating data load with name: train_dataset_name


In [5]:
from data.data_validation import DataValidation
dv = DataValidation()

In [6]:
is_valid = dv.run(df)

2024-01-16 08:07:20 [info     ] Initiating validation...      
2024-01-16 08:07:20 [info     ] Validation columns passed...  
2024-01-16 08:07:20 [info     ] Success on validate data      


In [7]:
from data.data_transformation import DataTransformation
dt = DataTransformation(df)

In [8]:
X_train, X_val, y_train, y_val = dt.train_test_spliting()
print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

(112500, 10)
(37500, 10)
(112500,)
(37500,)


In [9]:
from sklearn.pipeline import Pipeline
from feature_engine.discretisation import EqualFrequencyDiscretiser, EqualWidthDiscretiser
from feature_engine.imputation import MeanMedianImputer
from feature_engine.wrappers import SklearnTransformerWrapper
from sklearn.preprocessing import RobustScaler, StandardScaler

In [10]:
pipe = Pipeline(
    [
    ('imputer', MeanMedianImputer(variables=load_config_file().get('vars_imputer'))),
        ('discretizer', EqualFrequencyDiscretiser(variables=load_config_file().get('vars_discretizer'))),
            ('scaler', SklearnTransformerWrapper(StandardScaler()))
        ]
            )

In [11]:
from data.data_preprocess import DataPreprocess
dp = DataPreprocess(pipe)

dp.train(X_train)
X_train_processed = dp.transform(X_train)
X_val_processed = dp.transform(X_val)

2024-01-16 08:07:21 [info     ] Starting preprocessing...     
2024-01-16 08:07:21 [info     ] Initiating preprocessor data transformation...
2024-01-16 08:07:21 [info     ] Initiating preprocessor data transformation...


In [12]:
from train.train import TrainModels

In [13]:
tm = TrainModels(dados_X = X_train_processed, dados_y = y_train)

In [14]:
from sklearn.linear_model import LogisticRegression
model = tm.train(model = LogisticRegression())

In [15]:
tm.save_model()

In [16]:
from evaluation.classifier_eval import ModelEvaluation

In [19]:
model_eval = ModelEvaluation( model, X_train_processed, y_train, n_splits = 5)

roc_auc_scores = model_eval.cross_val_evaluate()
roc_auc_scores.mean()

2024-01-16 08:08:24 [info     ] Initiating cross validation...


0.7920336586401462