# Initialization

In [11]:
#! pip install --quiet streamlit plotly

In [15]:
pip install --quiet plotly

Note: you may need to restart the kernel to use updated packages.


In [31]:
import streamlit as st
import pandas as pd
import numpy as np

# Folder Contents

In [4]:
pwd

'C:\\Users\\Jean\\OCR\\git\\project_8'

In [4]:
path_code = 'Index.py'

In [8]:
ls $path_code

Index.py


In [9]:
cat $path_code

'''
Index.py
Main page of Streamlit app
'''

import streamlit as st

st.set_page_config(
	page_title="Hello",
	page_icon="👋",
	layout="wide"	
)

# Customize the sidebar
st.sidebar.title("About")
st.sidebar.info("""
	Web App URL: <https://project8-ocr.streamlit.app/>
	GitHub Repository: <https://github.com/JeanRosselVallee/project_8>
	""")
st.sidebar.image("https://www.whenthebanksaysno.co.uk/wp-content/uploads/2023/05/D9585792-ED4C-4363-900E-1EDCE31B99B1.jpeg")

# Customize page title
st.title("Credit Application Dashboard")

st.markdown(
    """
    This multipage app template demonstrates various interactive web apps created using [streamlit](https://streamlit.io) and [leafmap](https://leafmap.org). It is an open-source project and you are very welcome to contribute to the [GitHub repository](https://github.com/giswqs/streamlit-multipage-template).
    """
)

st.header("Instructions")

markdown = """
1. Select a client's application for credit
2. Find your favorite emoji from https:/

# Launch App on LocalHost

In [5]:
! streamlit run $path_code

^C


# Commandes Git

# Import Data from Project 7

## Choix du jeu de données

Les prédictions se feront sur les applications du jeu de test parce que le modèle a été entraîné sur celui d'entraînement

In [76]:
ls ../project_7/modeling/data/out/*test_2.csv ../project_7/modeling/data/out/*pred*.csv

../project_7/modeling/data/out/X_test_2.csv
../project_7/modeling/data/out/y_pred_4.csv
../project_7/modeling/data/out/y_test_2.csv


In [77]:
cp ../project_7/modeling/data/out/*_test_2.csv ./data/in/

In [89]:
cp ../project_7/test_api/data/li_features.txt ./data/in/

In [90]:
ls ./data/in/

li_features.txt  X_test_2.csv  y_pred_4.csv  y_test_2.csv


In [91]:
! for file_i in ./data/in/* ; do wc -l $file_i ; done

1 ./data/in/li_features.txt
48679 ./data/in/X_test_2.csv
48679 ./data/in/y_pred_4.csv
48679 ./data/in/y_test_2.csv


### Chargement

In [195]:
dir_in = './data/in/'

In [153]:
def load_data(file):
    try    : 
        df_contents = pd.read_csv(file) .rename(columns={'Unnamed: 0': 'request_id'}) \
          pip install --quiet                               .set_index('request_id')
        return df_contents
    except Exception as e: 
        print(f'Could not open file {file}: {e}')

#### Attributs

In [154]:
path_X = dir_in + 'X_test_2.csv'
df_X = load_data(path_X)
df_X.shape

(48678, 125)

In [155]:
with open(dir_in + 'li_features.txt') as file_object:
    str_li_features = file_object.read()
li_features = eval(str_li_features)
li_features

['CODE_GENDER_M',
 'EXT_SOURCE_3',
 'EXT_SOURCE_2',
 'NAME_EDUCATION_TYPE_Secondary_or_secondary_special',
 'NAME_EDUCATION_TYPE_Higher_education',
 'NAME_CONTRACT_TYPE_Cash_loans',
 'NAME_INCOME_TYPE_Working']

In [157]:
df_X = df_X[li_features]
display(df_X.head(1))
df_X.shape

Unnamed: 0_level_0,CODE_GENDER_M,EXT_SOURCE_3,EXT_SOURCE_2,NAME_EDUCATION_TYPE_Secondary_or_secondary_special,NAME_EDUCATION_TYPE_Higher_education,NAME_CONTRACT_TYPE_Cash_loans,NAME_INCOME_TYPE_Working
request_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
155094,0,0.770087,0.607697,0,1,1,0


(48678, 7)

#### Cible

In [158]:
path_y = dir_in + 'y_test_2.csv'
df_y = load_data(path_y)
df_y.shape
display(df_y.head(1))

Unnamed: 0_level_0,TARGET
request_id,Unnamed: 1_level_1
155094,0


### Jointure d'attributs et cible

In [161]:
df_data = df_X.join(df_y)
df_data.columns = ['male', 'score_A', 'score_B', 'edu_level_2', 'edu_level_3', 'cash_loan', 'employee', 'class']
display(df_data.head(1))
df_data.shape

Unnamed: 0_level_0,male,score_A,score_B,edu_level_2,edu_level_3,cash_loan,employee,class
request_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
155094,0,0.770087,0.607697,0,1,1,0,0


(48678, 8)

### Sauvegarde

In [174]:
df_data.to_csv(dir_in + 'data.csv')

In [179]:
ls $dir_in/data.csv

./data/in//data.csv


In [184]:
! head -n 3 $dir_in/data.csv

request_id,male,score_A,score_B,edu_level_2,edu_level_3,cash_loan,employee,class
155094,0,0.7700870700124128,0.6076973714617412,0,1,1,0,0
74108,0,0.4258928980051529,0.7318427244611323,1,0,1,1,0


## Choix du modèle

On copie le modèle de prédiction le plus récent: celui deployé en pré-Prod car celui en Prod est absent du dépôt Git

In [None]:
cp ../project_7/api/staging_model/* ./data/model/

# API

## Lancement du serveur

### Inialisation

In [210]:
#pip install --quiet mlflow xgboost

Note: you may need to restart the kernel to use updated packages.


In [211]:
import mlflow
from xgboost import XGBClassifier

In [232]:
mkdir -p ./data/modeling/data/out/     # Predictions backup folder 

### Arrêt

In [236]:
port_server = '5677'

In [249]:
mask = ':' + port_server
! pkill -f "$mask"

### Démarrage

In [250]:
ip_host = '0.0.0.0'
shell_command =  'mkdir -p ./data/modeling/data/out ; ' # Backup dir
shell_command += 'cd ./data/model/ ; nohup mlflow models serve -m '
shell_command += './ -p ' + port_server + ' -h ' + ip_host
print(shell_command)
get_ipython().system_raw(shell_command + ' --no-conda &')          # runs model API in background

mkdir -p ./data/modeling/data/out ; cd ./data/model/ ; nohup mlflow models serve -m ./ -p 5677 -h 0.0.0.0


2024/07/07 19:41:54 INFO mlflow.models.flavor_backend_registry: Selected backend for flavor 'python_function'
2024/07/07 19:41:54 INFO mlflow.pyfunc.backend: === Running command 'exec gunicorn --timeout=60 -b 0.0.0.0:5677 -w 1 ${GUNICORN_CMD_ARGS} -- mlflow.pyfunc.scoring_server.wsgi:app'
[2024-07-07 19:41:54 +0200] [43880] [INFO] Starting gunicorn 22.0.0
[2024-07-07 19:41:54 +0200] [43880] [INFO] Listening at: http://0.0.0.0:5677 (43880)
[2024-07-07 19:41:54 +0200] [43880] [INFO] Using worker: sync
[2024-07-07 19:41:54 +0200] [43881] [INFO] Booting worker with pid: 43881
 - mlflow (current: 2.14.2, required: mlflow==2.14.1)
 - packaging (current: 24.1, required: packaging==23.2)
 - scikit-learn (current: 1.5.1, required: scikit-learn==1.5.0)
 - scipy (current: 1.14.0, required: scipy==1.13.1)
To fix the mismatches, call `mlflow.pyfunc.get_model_dependencies(model_uri)` to fetch the model's environment and install dependencies using the resulting environment file.


### Vérification d'exécution

Il y a 2 processus qui tournent par serveur

In [251]:
! ps aux | grep "scoring_server" | grep -v "grep" | awk '{print $2, $15, $19}'

43880 0.0.0.0:5677 mlflow.pyfunc.scoring_server.wsgi:app
43881 0.0.0.0:5677 mlflow.pyfunc.scoring_server.wsgi:app


## Accès distant

In [240]:
url_api = 'localhost:' + port_server + '/invocations'
print('URL API    -> http://' + url_api)

URL API    -> http://localhost:5677/invocations


### Prédictions

Demande par requête POST de prédiction de la cible pour une observation

In [255]:
cp ../project_7/modeling/data/out/X_T[PN].csv ./data/in/

In [None]:
nb_observations = 1

### Cas TP

In [256]:
path_TP = dir_in + 'X_TP.csv'
df_TP = pd.read_csv(path_TP)

In [279]:
df_TP_sample = df_TP.sample(1)
df_TP_sample

Unnamed: 0,CODE_GENDER_M,EXT_SOURCE_3,EXT_SOURCE_2,NAME_EDUCATION_TYPE_Secondary_or_secondary_special,NAME_EDUCATION_TYPE_Higher_education,NAME_CONTRACT_TYPE_Cash_loans,NAME_INCOME_TYPE_Working
2243,0,0.323311,0.327503,0,1,1,1


In [266]:
def get_curl_command(df_sample, url) :
    str_features_values = df_sample.to_json(orient='split')
    str_data = '\'{"dataframe_split": ' + str_features_values + '}\' '
    return 'curl -d' + str_data + '''-H 'Content-Type: application/json' -X POST ''' + url

Vérifier que cette ligne de commande Linux renvoie une prédiction de classe "1"

In [276]:
shell_command = get_curl_command(df_TP_sample_1, url_api)
print(shell_command)

curl -d'{"dataframe_split": {"columns":["CODE_GENDER_M","EXT_SOURCE_3","EXT_SOURCE_2","NAME_EDUCATION_TYPE_Secondary_or_secondary_special","NAME_EDUCATION_TYPE_Higher_education","NAME_CONTRACT_TYPE_Cash_loans","NAME_INCOME_TYPE_Working"],"index":[1415],"data":[[0,0.0005272652,0.5148401451,1,0,1,1]]}}' -H 'Content-Type: application/json' -X POST localhost:5677/invocations


In [277]:
get_ipython().system_raw(shell_command)  

{"predictions": [1]}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   313  100    20  100   293   1000  14650 --:--:-- --:--:-- --:--:-- 15650


### Cas TN

In [261]:
path_TN = dir_in + 'X_TN.csv'
df_TN = pd.read_csv(path_TN)

In [278]:
df_TN_sample_1 = df_TN.sample(1)
df_TN_sample_1

Unnamed: 0,CODE_GENDER_M,EXT_SOURCE_3,EXT_SOURCE_2,NAME_EDUCATION_TYPE_Secondary_or_secondary_special,NAME_EDUCATION_TYPE_Higher_education,NAME_CONTRACT_TYPE_Cash_loans,NAME_INCOME_TYPE_Working
22089,0,0.75574,0.660903,1,0,1,0


Vérifier que cette ligne de commande Linux renvoie une prédiction de classe "0"

In [273]:
shell_command = get_curl_command(df_TN_sample_1, url_api)
print(shell_command)

curl -d'{"dataframe_split": {"columns":["CODE_GENDER_M","EXT_SOURCE_3","EXT_SOURCE_2","NAME_EDUCATION_TYPE_Secondary_or_secondary_special","NAME_EDUCATION_TYPE_Higher_education","NAME_CONTRACT_TYPE_Cash_loans","NAME_INCOME_TYPE_Working"],"index":[31310],"data":[[0,0.4031416767,0.6076348106,1,0,1,0]]}}' -H 'Content-Type: application/json' -X POST localhost:5677/invocations


In [274]:
get_ipython().system_raw(shell_command)  

{"predictions": [0]}

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   314  100    20  100   294    869  12782 --:--:-- --:--:-- --:--:-- 13652


# Fin du traitement

In [254]:
assert(False) # prevents the execution of following cells

AssertionError: 

# Annexes