In [None]:
import mlflow
import joblib
import pickle
import os
from hydra import compose, initialize
from hydra.utils import instantiate
import pandas as pd
from ProcessData.dataset import Dataset
from ProcessData.data_splitter import data_splitter
from Evaluate.pips import get_pips_margin
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.calibration import CalibratedClassifierCV

from sklearn.calibration import calibration_curve
from sklearn.utils import estimator_html_repr
from sklearn.metrics import brier_score_loss

from Model.SklearnPipeline import CustomPipeline

from my_logger.custom_logger import  logger

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
initialize(config_path="conf/", version_base=None)
cfg = compose(config_name="config.yaml")

In [None]:

data_pipeline = instantiate(cfg.data_pipeline)
df = pd.read_csv(cfg.Data_Source)
df = data_pipeline.apply(df.copy())
dataset = Dataset(data=df, data_splitter=data_splitter)

In [None]:
dataset.X_train[['Datum','next_close_price1', 'close_price1', 'diff']]

In [None]:
dataset.y_train

In [None]:
model_name = 'deploy/model.pkl'

current_directory = os.path.dirname(os.path.realpath('scoring_script.py'))
# Construct the relative path to the model file
model_path = os.path.join(current_directory, model_name)
logger.info(f'Get model from {model_path}')
# Check if the model file exists
if not os.path.exists(model_path):
    raise FileNotFoundError(f"Model file '{model_name}' not found in the current directory.")

# Load the model
with open (model_path, 'rb') as m:
    model = pickle.load(m)
print(type(model))

In [None]:
pred = model.predict_proba(dataset.X_oot)

In [None]:
pred

In [None]:
sns.kdeplot(pred[:,1], shade=True)
plt.xlabel('Probability')
plt.ylabel('Density')
plt.title('Density Plot of Predicted Probabilities')
plt.show()

In [None]:


# Assuming you have some predict_proba results stored in a variable called predict_proba_results
# Replace this with your actual predict_proba results
pred_1 = [x if y > 0.5 else 0 for x, y in zip(pred[:, 1], dataset.y_oot)]
pred_1 = [x for x in pred_1 if x != 0]


pred_0 = [x if y < 0.5 else 0 for x, y in zip(pred[:, 1], dataset.y_oot)]
pred_0 = [x for x in pred_0 if x != 0]

# Create a density plot using seaborn
sns.kdeplot(pred_0, shade=True)
sns.kdeplot(pred_1, shade=True)
plt.xlabel('Probability')
plt.ylabel('Density')
plt.title('Density Plot of Predicted Probabilities')
plt.show()
