In [None]:
import pandas as pd 
import numpy as np 
import tensorflow as tf
import os
import s3fs
import matplotlib.pyplot as plt 

from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.layers import Dense, GRU, Dropout, Bidirectional, BatchNormalization
from tensorflow.keras.activations import linear, relu, sigmoid
from tensorflow.keras.regularizers import l2

from modules.utils import pre_process_data, encoded_categorical_features

In [None]:
S3_ENDPOINT_URL = "https://" + os.environ["AWS_S3_ENDPOINT"]
fs = s3fs.S3FileSystem(client_kwargs={'endpoint_url': S3_ENDPOINT_URL})
BUCKET = "ebahri-ensae"
FILE_KEY_S3 = "X_test_Hi5.csv"
FILE_PATH_S3 = BUCKET + "/" + FILE_KEY_S3

with fs.open(FILE_PATH_S3, mode="rb") as file_in:
    X_test = pd.read_csv(file_in, sep=",")

In [None]:
X_test_copy = X_test

In [None]:
X_test["piezo_groundwater_level_category"] = 0
x_test_without_nan, y_train = pre_process_data(X_test)

In [None]:
x_test_without_nan["insee_%_agri"] = x_test_without_nan["insee_%_agri"].replace(
    {'N/A - division par 0': 0}  # Replace with 0 or any value you choose
).astype(float)
x_test_without_nan["insee_med_living_level"] = x_test_without_nan["insee_med_living_level"].replace(
    {'N/A - résultat non disponible': 0}  # Replace with 0 or any value you choose
).astype(float)
x_test_without_nan["insee_%_ind"] = x_test_without_nan["insee_%_ind"].replace(
    {'N/A - division par 0': 0}  # Replace with 0 or any value you choose
).astype(float)
x_test_without_nan["insee_%_const"] = x_test_without_nan["insee_%_const"].replace(
    {'N/A - division par 0': 0}  # Replace with 0 or any value you choose
).astype(float)

In [None]:
X_final = encoded_categorical_features(x_test_without_nan)

In [None]:
X_final = pd.DataFrame(X_final)
X_final.info()
X_final = X_final.fillna(X_final.mean())

In [None]:
import xgboost as xgb

# Create an XGBClassifier instance
loaded_model = xgb.XGBClassifier()

# Load the model from a file
loaded_model.load_model("models/xgboost_model_2.json")

In [None]:
y_pred = loaded_model.predict(X_final)

In [None]:
y_pred = pd.DataFrame(y_pred)
y_pred.head()

In [None]:
# Convert predictions to DataFrame


# Add a row index starting from 2331796
y_pred["row_index"] = X_test_copy["row_index"]
# Define category mapping
category_mapping = {
    0: "Very Low",
    1: "Low",
    2: "Average",
    3: "High",
    4: "Very High"
}

# Map the category values
y_pred["piezo_groundwater_level_category"] = y_pred[0].map(category_mapping)

# Display the final DataFrame
y_pred = y_pred.drop(columns=[0])

In [None]:
X_final.info()

In [None]:
y_pred.info()

In [None]:
y_pred.to_csv("y_test_2.csv", index=False)