In [1]:
from churn.parsing import DataManager
from churn.model import ChurnModel
from pathlib import Path
import yaml
from churn.logger import logger

In [2]:
RAW_DATA_PATH = Path(".").absolute().parent / "data/input/case_churn.csv"
CONFIG_PATH = Path(".").absolute().parent / "config/config.yml"
MODEL_PATH = Path(".").absolute().parent / "artefacts/models/model.joblib"
REPORT_FOLDER_PATH = Path(".").absolute().parent / "artefacts/reports/model.joblib"

In [3]:
with open(CONFIG_PATH) as f:
    config = yaml.safe_load(f)

## Load data

In [4]:
data_manager = DataManager(RAW_DATA_PATH)

data_manager.data.head()

INFO [ 2023-01-01 21:41:30,798 | parsing.py | __init__ ] DataManager initialized.
INFO [ 2023-01-01 21:41:30,799 | parsing.py | data ] Processing raw data.
INFO [ 2023-01-01 21:41:30,800 | parsing.py | raw_data ] Reading raw data.
INFO [ 2023-01-01 21:41:30,819 | parsing.py | raw_data ] Reading raw data.


Unnamed: 0,customerid,gender,seniorcitizen,partner,dependents,age,tenure,busines_loan,multiplebusinessloans,creditline,...,mortgage,stocks,forex,contract,paperlessbilling,paymentmethod,monthlycharges,totalcharges,churn_within_a_month,satisfactory_onboarding_form
0,1,Female,0,Yes,No,35,1,No,,B,...,No,No,No,A,Yes,Electronic check,29.85,29.85,No,4
1,2,Male,0,No,No,46,34,Yes,No,B,...,No,No,No,B,No,Mailed check,56.95,1889.5,No,2
2,3,Male,0,No,No,41,2,Yes,No,B,...,No,No,No,A,Yes,Mailed check,53.85,108.15,Yes,3
3,4,Male,0,No,No,40,45,No,,B,...,Yes,No,No,B,No,Bank transfer (automatic),42.3,1840.75,No,1
4,5,Female,0,No,No,59,2,Yes,No,A,...,No,No,No,A,Yes,Electronic check,70.7,151.65,Yes,5


## Fit model

In [5]:
# Initialize model:
model = ChurnModel(
    numeric_features_list=config["model_dtype_treatment"]["numeric"],
    categorical_features_list=config["model_dtype_treatment"]["categorical"],
    response_col=config["model_dtype_treatment"]["response"],
)

In [6]:
# Train model
model.train(data_manager.data)

INFO [ 2023-01-01 21:41:30,912 | parsing.py | data ] Processing raw data.
INFO [ 2023-01-01 21:41:30,912 | parsing.py | raw_data ] Reading raw data.
INFO [ 2023-01-01 21:41:30,931 | parsing.py | raw_data ] Reading raw data.
INFO [ 2023-01-01 21:41:30,966 | model.py | train ] Creating pipeline.
INFO [ 2023-01-01 21:41:30,967 | model.py | _make_cls_pipeline ] Creating classification pipeline.
INFO [ 2023-01-01 21:41:30,967 | model.py | _make_transformer ] Creating num + cat transformer.
INFO [ 2023-01-01 21:41:30,967 | model.py | _make_numeric_transformer ] Creating numeric transformed created.
INFO [ 2023-01-01 21:41:30,968 | model.py | _make_scaler ] Creating scaler.
INFO [ 2023-01-01 21:41:30,968 | model.py | _make_scaler ] Scaler created.
INFO [ 2023-01-01 21:41:30,969 | model.py | _make_numeric_transformer ] Numeric transformed created.
INFO [ 2023-01-01 21:41:30,969 | model.py | _make_cat_transformer ] Creating categorical transformer.
INFO [ 2023-01-01 21:41:30,969 | model.py | _m

In [10]:
# predict
model.model.predict(data_manager.data)

INFO [ 2023-01-01 22:09:38,943 | parsing.py | data ] Processing raw data.
INFO [ 2023-01-01 22:09:38,943 | parsing.py | raw_data ] Reading raw data.
INFO [ 2023-01-01 22:09:38,960 | parsing.py | raw_data ] Reading raw data.


array([1, 0, 1, ..., 1, 1, 0])

In [11]:
# Save model
model.save_model(MODEL_PATH)

INFO [ 2023-01-01 22:09:52,735 | model.py | save_model ] Saving model to /Users/heamac0167/Desktop/churn/artefacts/models/model.joblib
INFO [ 2023-01-01 22:09:52,818 | model.py | save_model ] Model saved.


In [12]:
# Load model
model.load_model(MODEL_PATH)

INFO [ 2023-01-01 22:10:10,878 | model.py | load_model ] Loading model from /Users/heamac0167/Desktop/churn/artefacts/models/model.joblib
INFO [ 2023-01-01 22:10:10,908 | model.py | load_model ] Model loaded.
