In [79]:
import pandas as pd

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from imblearn.over_sampling import RandomOverSampler

from lightgbm import LGBMClassifier
import pickle

import json
import requests

API_URL = "http://127.0.0.1:5000/api"

In [43]:
response = requests.get(API_URL + "/test")
print(response.text)

API fonctionnelle


In [44]:
test_data = pd.read_csv("csv/preprocessed/app_test.csv")
json_data = test_data.head(10).to_json(orient = "records")

response = requests.post(API_URL + "/dimensions", json = {"data" : json_data})
response.json()

{'dimensions': [10, 772]}

In [49]:
request_data = {
    "data": json_data,
    "index": 5
}

response = requests.post(API_URL + "/extract", json = request_data)
pd.read_json(response.json())

Unnamed: 0,SK_ID_CURR,TARGET
0,309296,0
1,316121,0
2,220130,0
3,254432,0
4,345565,0
5,125782,0
6,433605,0
7,268903,0
8,284617,0
9,131463,1


In [87]:
def preprocess_data(df_train, df_test, selected_features) :
    
    # Extraction des features avec copie
    data = {
        "X_train" : df_train[selected_features].copy(),
        "X_test" : df_test[selected_features].copy(),
        "y_train" : df_train["TARGET"].copy(),
        "y_test" : df_test["TARGET"].copy(),
        "amt_credit_train" : df_train["AMT_CREDIT"].copy(),
        "amt_credit_test" : df_test["AMT_CREDIT"].copy()
    }
    
    # Abandon des éventuelles colonnes n'ayant qu'une seule valeur
    columns_to_drop = data["X_train"].columns[data["X_train"].nunique() == 1]
    X_train_cleaned = data["X_train"].drop(columns = columns_to_drop)
    X_test_cleaned = data["X_test"].drop(columns = columns_to_drop)
    
    # Imputation des valeurs manquantes par la médiane
    imputer = SimpleImputer(strategy = "median")
    X_train_imputed = imputer.fit_transform(X_train_cleaned)
    X_test_imputed = imputer.transform(X_test_cleaned)
    
    # Normalisation des valeurs
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train_imputed)
    X_test_scaled = scaler.transform(X_test_imputed)
    
    # Réattribution des noms des colonnes
    data["X_train"] = pd.DataFrame(X_train_scaled, columns = X_train_cleaned.columns)
    data["X_test"] = pd.DataFrame(X_test_scaled, columns = X_test_cleaned.columns)
    
    return data

In [88]:
train_data = pd.read_csv("csv/preprocessed/app_train.csv")
train_data.shape

(246005, 772)

In [91]:
with open("selected_features.txt", "r") as file :
    lines = file.readlines()

selected_features = [line.strip() for line in lines]

processed_data = preprocess_data(train_data, test_data, selected_features)
processed_data["X_test"].to_csv("X_test.csv", index = False)

In [92]:
json_data = processed_data["X_test"].head(50).to_json(orient = "records")

response = requests.post(API_URL + "/predict", json = {"data": json_data})

predictions = response.json()
print("Prédictions :", predictions)

Prédictions : [{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":1},{"PREDICTION":1},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0},{"PREDICTION":1},{"PREDICTION":1},{"PREDICTION":0},{"PREDICTION":0}]
