# Initialization

In [1]:
# pip install --quiet streamlit plotly mlflow xgboost

In [2]:
! pip install --quiet -r ./data/model/requirements.txt

In [3]:
import streamlit as st
import pandas as pd
import numpy as np
import json
import mlflow

In [4]:
dir_in = './data/in/'

# Folder Contents

In [5]:
pwd

'/home/user_n/Documents/Dev/git/project_8'

In [6]:
path_code = 'Index.py'

In [7]:
ls $path_code

Index.py


In [8]:
cat $path_code

'''
Index.py
Main page of Streamlit app
'''

import streamlit as st

st.set_page_config(
	page_title="Hello",
	page_icon="👋",
	layout="wide"	
)

# Customize the sidebar
st.sidebar.title("About")
st.sidebar.info("""
	Web App URL: <https://project8-ocr.streamlit.app/>
	GitHub Repository: <https://github.com/JeanRosselVallee/project_8>
	""")
st.sidebar.image("https://www.whenthebanksaysno.co.uk/wp-content/uploads/2023/05/D9585792-ED4C-4363-900E-1EDCE31B99B1.jpeg")

# Customize page title
st.title("Credit Application Dashboard")

st.markdown(
    """
    This multipage app template demonstrates various interactive web apps created using [streamlit](https://streamlit.io) and [leafmap](https://leafmap.org). It is an open-source project and you are very welcome to contribute to the [GitHub repository](https://github.com/giswqs/streamlit-multipage-template).
    """
)

st.header("Instructions")

markdown = """
1. Select a client's application for credit
2. Find your favorite emoji from https:/

# Launch App on LocalHost

In [9]:
#! streamlit run $path_code

[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8502[0m
[34m  Network URL: [0m[1mhttp://192.168.1.107:8502[0m
[0m
^C
[34m  Stopping...[0m


# Commandes Git

# Import Data from Project 7

## Choix du jeu de données

Les prédictions se feront sur les applications du jeu de test parce que le modèle a été entraîné sur celui d'entraînement

In [36]:
ls ../project_7/modeling/data/out/*test_2.csv ../project_7/modeling/data/out/*pred*.csv

../project_7/modeling/data/out/X_test_2.csv
../project_7/modeling/data/out/y_pred_4.csv
../project_7/modeling/data/out/y_test_2.csv


In [37]:
cp ../project_7/modeling/data/out/*_test_2.csv ./data/in/

In [38]:
cp ../project_7/test_api/data/li_features.txt ./data/in/

In [39]:
ls ./data/in/

config.json      model_optimal_simplified.json  X_TP.csv
data.csv         X_test_2.csv                   y_pred_4.csv
li_features.txt  X_TN.csv                       y_test_2.csv


In [40]:
! for file_i in ./data/in/* ; do wc -l $file_i ; done

8 ./data/in/config.json
48679 ./data/in/data.csv
1 ./data/in/li_features.txt
0 ./data/in/model_optimal_simplified.json
48679 ./data/in/X_test_2.csv
33121 ./data/in/X_TN.csv
2359 ./data/in/X_TP.csv
48679 ./data/in/y_pred_4.csv
48679 ./data/in/y_test_2.csv


### Chargement

In [41]:
def load_data(file):
    try    : 
        df_contents = pd.read_csv(file)   .rename(columns={'Unnamed: 0': 'request_id'}) \
                                          .set_index('request_id')
        return df_contents
    except Exception as e: 
        print(f'Could not open file {file}: {e}')

#### Attributs

In [42]:
path_X = dir_in + 'X_test_2.csv'
df_X = load_data(path_X)
df_X.shape

(48678, 125)

In [43]:
with open(dir_in + 'li_features.txt') as file_object:
    str_li_features = file_object.read()
li_features = eval(str_li_features)
li_features

['CODE_GENDER_M',
 'EXT_SOURCE_3',
 'EXT_SOURCE_2',
 'NAME_EDUCATION_TYPE_Secondary_or_secondary_special',
 'NAME_EDUCATION_TYPE_Higher_education',
 'NAME_CONTRACT_TYPE_Cash_loans',
 'NAME_INCOME_TYPE_Working']

In [44]:
df_X = df_X[li_features]
display(df_X.head(1))
df_X.shape

Unnamed: 0_level_0,CODE_GENDER_M,EXT_SOURCE_3,EXT_SOURCE_2,NAME_EDUCATION_TYPE_Secondary_or_secondary_special,NAME_EDUCATION_TYPE_Higher_education,NAME_CONTRACT_TYPE_Cash_loans,NAME_INCOME_TYPE_Working
request_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
155094,0,0.770087,0.607697,0,1,1,0


(48678, 7)

#### Cible

In [45]:
path_y = dir_in + 'y_test_2.csv'
df_y = load_data(path_y)
df_y.shape
display(df_y.head(1))

Unnamed: 0_level_0,TARGET
request_id,Unnamed: 1_level_1
155094,0


### Jointure d'attributs et cible

Les variables portent des noms simplifiés

In [46]:
li_features_simplified  = ['male', 'score_A', 'score_B', 'edu_level_2', 'edu_level_3', 'cash_loan', 'employee']
li_variables_simplified = li_features_simplified + ['class']

In [47]:
df_data = df_X.join(df_y)
df_data.columns = li_variables_simplified
display(df_data.head(1))
df_data.shape

Unnamed: 0_level_0,male,score_A,score_B,edu_level_2,edu_level_3,cash_loan,employee,class
request_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
155094,0,0.770087,0.607697,0,1,1,0,0


(48678, 8)

### Sauvegarde

In [48]:
df_data.to_csv(dir_in + 'data.csv')

In [49]:
ls $dir_in/data.csv

./data/in//data.csv


In [50]:
! head -n 3 $dir_in/data.csv

request_id,male,score_A,score_B,edu_level_2,edu_level_3,cash_loan,employee,class
155094,0,0.7700870700124128,0.6076973714617412,0,1,1,0,0
74108,0,0.4258928980051529,0.7318427244611323,1,0,1,1,0


## Choix du modèle

On copie le modèle de prédiction le plus récent: celui deployé en pré-Prod car celui en Prod est absent du dépôt Git

In [51]:
# cp ../project_7/api/staging_model/* ./data/model/

# Modèle adapté

In [52]:
from xgboost import XGBClassifier

In [53]:
class XGB_prob(XGBClassifier):
    threshold = 0 
    def __init__(self, threshold=0.5, **kwargs):
        super().__init__(**kwargs)
        self.threshold = threshold
    def fit(self, df_X_train, df_y_train, **kwargs) : 
        super().fit(df_X_train, df_y_train, **kwargs)
    def predict(self, df_X_subset, bool_save_events=True):      
        np_y_pred_proba = super().predict_proba(df_X_subset)[:, 1]  # class 1 probas   
        return np_y_pred_proba

## Récupération

In [54]:
path_config = '../config.json' 

In [55]:
# cp ../project_7/config.json ./data/in/

In [56]:
# cp ../project_7/modeling/data/out/model_optimal_simplified.json ./data/in/

In [57]:
model_prob = XGB_prob()
model_prob.load_model(dir_in + 'model_optimal_simplified.json')
model_prob

### Génération de signature

In [94]:
from mlflow.models.signature import infer_signature

In [98]:
model_signature = infer_signature(df_X.head(1), df_y.head(1))



#### Attributs

In [99]:
list(model_signature.inputs)[:10]

['CODE_GENDER_M': long (required),
 'EXT_SOURCE_3': double (required),
 'EXT_SOURCE_2': double (required),
 'NAME_EDUCATION_TYPE_Secondary_or_secondary_special': long (required),
 'NAME_EDUCATION_TYPE_Higher_education': long (required),
 'NAME_CONTRACT_TYPE_Cash_loans': long (required),
 'NAME_INCOME_TYPE_Working': long (required)]

#### Cible

In [100]:
list(model_signature.outputs)

['TARGET': long (required)]

### Mise à jour du seuil binaire

In [101]:
dict_params_old = model_prob.get_params()
dict_params_old['threshold']

0.09

In [102]:
with open(dir_in + 'config.json', 'r') as json_file: dict_to_config = json.load(json_file)
best_threshold = float(dict_to_config['best_threshold'])

In [103]:
setattr(model_prob, 'threshold', best_threshold)

In [104]:
dict_params_new = model_prob.get_params()
dict_params_new['threshold']

0.09

## Déploiement

In [105]:
from mlflow import sklearn as skl

In [106]:
model_path = './data/model/'
! rm -rf $model_path ; mkdir -p "$model_path"
%time skl.save_model(model_prob, model_path, signature=model_signature)

CPU times: user 72.7 ms, sys: 0 ns, total: 72.7 ms
Wall time: 2 s




#### Fichiers générés

In [107]:
!find "$model_path"

./data/model/
./data/model/conda.yaml
./data/model/python_env.yaml
./data/model/MLmodel
./data/model/model.pkl
./data/model/requirements.txt


## Lancement du serveur

### Arrêt

In [108]:
port_server = '5677'

In [109]:
mask = ':' + port_server
! pkill -f "$mask"

[2024-07-08 14:48:09 +0200] [59280] [INFO] Handling signal: term
[2024-07-08 14:48:09 +0200] [59281] [INFO] Worker exiting (pid: 59281)


### Démarrage

In [110]:
ip_host = '0.0.0.0'
shell_command =  'nohup mlflow models serve -m '
shell_command += model_path + ' -p ' + port_server + ' -h ' + ip_host #+ ' --env-manager=conda'
print(shell_command)
get_ipython().system_raw(shell_command + ' --no-conda &')          # runs model API in background

nohup mlflow models serve -m ./data/model/ -p 5677 -h 0.0.0.0


[2024-07-08 14:48:10 +0200] [59280] [INFO] Shutting down: Master


### Vérification d'exécution

Il y a 2 processus qui tournent par serveur

In [112]:
! ps aux | grep "scoring_server" | grep -v "grep" | awk '{print $2, $15, $19}'

59546 0.0.0.0:5677 mlflow.pyfunc.scoring_server.wsgi:app
59547 0.0.0.0:5677 mlflow.pyfunc.scoring_server.wsgi:app


## Accès distant

In [74]:
url_api = 'localhost:' + port_server + '/invocations'
print('URL API    -> http://' + url_api)

URL API    -> http://localhost:5677/invocations


### Prédictions

Demande par requête POST de prédiction de la cible pour une observation

In [75]:
#cp ../project_7/modeling/data/out/X_T[PN].csv ./data/in/

In [76]:
nb_observations = 1

### Cas TP

In [113]:
path_TP = dir_in + 'X_TP.csv'
df_TP = pd.read_csv(path_TP)

In [114]:
df_TP_sample = df_TP.sample(nb_observations)
df_TP_sample

Unnamed: 0,CODE_GENDER_M,EXT_SOURCE_3,EXT_SOURCE_2,NAME_EDUCATION_TYPE_Secondary_or_secondary_special,NAME_EDUCATION_TYPE_Higher_education,NAME_CONTRACT_TYPE_Cash_loans,NAME_INCOME_TYPE_Working
2212,1,0.221335,0.557089,1,0,1,1


In [115]:
def get_curl_command(df_sample, url) :
    str_features_values = df_sample.to_json(orient='split')
    str_data = '\'{"dataframe_split": ' + str_features_values + '}\' '
    return 'curl -d' + str_data + '''-H 'Content-Type: application/json' -X POST ''' + url

Vérifier que cette ligne de commande Linux renvoie une prédiction de classe "1"

In [116]:
str_command = get_curl_command(df_TP_sample, url_api)
print(str_command)

curl -d'{"dataframe_split": {"columns":["CODE_GENDER_M","EXT_SOURCE_3","EXT_SOURCE_2","NAME_EDUCATION_TYPE_Secondary_or_secondary_special","NAME_EDUCATION_TYPE_Higher_education","NAME_CONTRACT_TYPE_Cash_loans","NAME_INCOME_TYPE_Working"],"index":[2212],"data":[[1,0.2213352064,0.557088648,1,0,1,1]]}}' -H 'Content-Type: application/json' -X POST localhost:5677/invocations


### Cas TN

In [86]:
path_TN = dir_in + 'X_TN.csv'
df_TN = pd.read_csv(path_TN)

In [87]:
df_TN_sample = df_TN.sample(nb_observations)
df_TN_sample

Unnamed: 0,male,score_A,score_B,edu_level_2,edu_level_3,cash_loan,employee
1905,0,0.75574,0.705748,0,1,1,0


Vérifier que cette ligne de commande Linux renvoie une prédiction de classe "0"

In [88]:
shell_command = get_curl_command(df_TN_sample, url_api)
print(shell_command)

curl -d'{"dataframe_split": {"columns":["male","score_A","score_B","edu_level_2","edu_level_3","cash_loan","employee"],"index":[1905],"data":[[0,0.7557400502,0.7057476837,0,1,1,0]]}}' -H 'Content-Type: application/json' -X POST localhost:5677/invocations


In [89]:
get_ipython().system_raw(shell_command)  

{"error_code": "BAD_REQUEST", "message": "Encountered an unexpected error while evaluating the model. Verify that the serialized input Dataframe is compatible with the model for inference.", "stack_trace": "Traceback (most recent call last):\n  File \"/home/user_n/anaconda3/envs/env_proj_8/lib/python3.10/site-packages/mlflow/pyfunc/scoring_server/__init__.py\", line 367, in invocations\n    raw_predictions = model.predict(data, params=params)\n  File \"/home/user_n/anaconda3/envs/env_proj_8/lib/python3.10/site-packages/mlflow/pyfunc/__init__.py\", line 737, in predict\n    return self._predict(data, params)\n  File \"/home/user_n/anaconda3/envs/env_proj_8/lib/python3.10/site-packages/mlflow/pyfunc/__init__.py\", line 769, in _predict\n    return self._predict_fn(data, params=params)\n  File \"/home/user_n/anaconda3/envs/env_proj_8/lib/python3.10/site-packages/mlflow/sklearn/__init__.py\", line 534, in predict\n    return self.sklearn_model.predict(data)\n  File \"/tmp/ipykernel_58995/1

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  2569  100  2395  100   174   167k  12428 --:--:-- --:--:-- --:--:--  179k


# Fin du traitement

In [254]:
assert(False) # prevents the execution of following cells

AssertionError: 

# Annexes