In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.impute import SimpleImputer
from catboost import CatBoostRegressor
import shap
import ipywidgets as widgets

In [4]:
market = pd.read_csv('estaticos_market.csv')
market.drop('Unnamed: 0', axis=1, inplace=True)

for col in market.columns:
    if market[col].isna().sum()/market[col].size >= 0.9:
        market.drop(col, axis=1, inplace=True)
        
obj_df = market.select_dtypes(exclude = 'number')
for col in obj_df.columns:
    market[col].fillna(value='NA', inplace=True)
    
num_df = market.select_dtypes(include = 'number')
inputer = SimpleImputer(strategy = 'median', verbose=1)
filled_array = inputer.fit_transform(num_df)
num_df = pd.DataFrame(filled_array, columns=num_df.columns, index=num_df.index)

for col in num_df.columns:
    if num_df[col].var() < 0.1:
        num_df.drop(col, axis=1, inplace=True)
        
df = pd.concat((obj_df, num_df), axis=1)
df.set_index(df['id'],inplace=True)
df.drop('id', axis=1, inplace=True)

cat = CatBoostRegressor(silent=True, iterations=200, max_depth = 5)

def train_classifier(classifier, client_list):
    market_clients = market.set_index('id')
    market_clients['is_client'] = 0
    market_clients.loc[client_list.array, 'is_client'] = 1
    target = market_clients['is_client']
    features = market_clients.drop('is_client', axis=1)
    cat_columns = features.select_dtypes(exclude = 'number').columns
    classifier.fit(features, target, cat_features=cat_columns)
    
def get_predict(classifier, client_list):
    prospects = market.set_index('id')
    prospects['is_client'] = 0
    prospects.loc[client_list.array, 'is_client'] = 1
    prospects = prospects[prospects['is_client'] == 0]
    prospects.drop('is_client', axis=1, inplace=True)
    return classifier.predict(prospects)

def get_shap(classifier, client_list):
    prospects = market.set_index('id')
    prospects['is_client'] = 0
    prospects.loc[client_list.array, 'is_client'] = 1
    prospects = prospects[prospects['is_client'] == 0]
    prospects.drop('is_client', axis=1, inplace=True)
    explainer = shap.TreeExplainer(cat)
    return explainer.shap_values(prospects)

In [20]:
w = widgets.FileUpload(
    accept='.csv',  # Accepted file extension e.g. '.txt', '.pdf', 'image/*', 'image/*,.pdf'
    multiple=False  # True to accept multiple files upload else False
)
display(w)

FileUpload(value={}, accept='.csv', description='Upload')

FileUpload(value={}, description='Upload')