In [3]:
# this cell can be ignored. This class is imported so the pipeline imported from the joblib file can function properly
from sklearn.base import BaseEstimator, TransformerMixin

class PdaysTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.median_pdays = None
    
    def fit(self, X, y=None):
        known_pdays = X.loc[X['pdays'] != -1, 'pdays']
        self.median_pdays = known_pdays.median() if len(known_pdays) > 0 else 0
        return self
    
    def transform(self, X):
        X = X.copy()
        X['prev_contacted'] = (X['pdays'] != -1).astype(int)
        X['pdays_duration'] = X['pdays'].replace(-1, self.median_pdays)
        X.drop('pdays', axis=1, inplace=True)
        return X

# Python predictions:

In [1]:
import pandas as pd

# create data based on different customer profiles to predict
data1 = {'age': 20.0, 'balance': 500, 'day': 15.0, 'duration': 255.0, 'campaign': 2.0, 'pdays': -1.0, 'previous': 0.0, 'job': 'student', 'marital': 'single',
         'education': 'secondary', 'default': 'no', 'housing': 'no', 'loan': 'no', 'contact': 'cellular', 'month': 'apr', 'poutcome': 'failure', 'deposit': 'no'}

data2 = {'age': 50.0, 'balance': 10000, 'day': 15.0, 'duration': 50, 'campaign': 2.0, 'pdays': 40, 'previous': 0.0, 'job': 'entrepreneur', 'marital': 'married',
         'education': 'secondary', 'default': 'no', 'housing': 'yes', 'loan': 'no', 'contact': 'cellular', 'month': 'apr', 'poutcome': 'success', 'deposit': 'yes'}

data3 = {'age': 70, 'balance': 200000, 'day': 15.0, 'duration': 100, 'campaign': 2.0, 'pdays': 100, 'previous': 0.0, 'job': 'retired', 'marital': 'divorced',
         'education': 'tertiary', 'default': 'yes', 'housing': 'no', 'loan': 'no', 'contact': 'cellular', 'month': 'apr', 'poutcome': 'success', 'deposit': 'yes'}

df = pd.DataFrame([data1, data2, data3]).drop("deposit", axis=1)

print(len(df.columns))
print(df)

16
    age  balance   day  duration  campaign  pdays  previous           job  \
0  20.0      500  15.0     255.0       2.0   -1.0       0.0       student   
1  50.0    10000  15.0      50.0       2.0   40.0       0.0  entrepreneur   
2  70.0   200000  15.0     100.0       2.0  100.0       0.0       retired   

    marital  education default housing loan   contact month poutcome  
0    single  secondary      no      no   no  cellular   apr  failure  
1   married  secondary      no     yes   no  cellular   apr  success  
2  divorced   tertiary     yes      no   no  cellular   apr  success  


In [None]:
from joblib import load

final_model = load("pack_for_streamlit.joblib")["model"]

predictions = final_model.predict_proba(df)

print(final_model.classes_)
print(predictions)

[0 1]
[[0.06156832 0.9384317 ]
 [0.9567617  0.04323829]
 [0.25048405 0.74951595]]


# Screenshots from Streamlit
Link to Google Drive document: https://docs.google.com/document/d/149WF32nd8esDMxO318H1eoJETsdKAUimh5ndB7Qit84/edit?usp=sharing
(make sure accessing with a uc3m account or else won't have access)