In [None]:
from plotly.offline import init_notebook_mode, iplot

import plotly.graph_objs as go
from plotly.subplots import make_subplots
import plotly.io as pio
pio.templates.default = 'plotly_white'

import numpy as np
import pandas as pd
import pickle

In [None]:
path='/mnt/cephfs/ml_data/mc_2021/'

data_target = pd.read_csv(f'{path}processed_data/ProcessedTrainRealNoised/ProcessedTrain.csv.gz')
data_target = data_target[data_target['edepR'] < 17.2]

data_source = pd.read_csv(f'{path}processed_data/ProcessedTrainReal/ProcessedTrain_1M.csv.gz')
data_source = data_source[data_source['edepR'] < 17.2]

In [None]:
N = int(2e4)

In [None]:
data_target = data_target.sample(N)
data_source = data_source.sample(N)

In [None]:
nrows = 10
ncols = 9

fig = make_subplots(
    rows=nrows, cols=ncols,
    subplot_titles = data_source.columns,
    horizontal_spacing=0.06
)

for k in range(1, nrows+1):
    for j in range(1, ncols+1):
        name_t = "Target"
        name_s = "Source"
        if (k-1)*(ncols) + j - 1 == 0:
            showlegend=True
        else:
            showlegend=False
        feature_target = data_target.iloc[:, (k-1)*(ncols) + j - 1]
        feature_source = data_source.iloc[:, (k-1)*(ncols) + j - 1]
        mini = min(feature_target.min(), feature_source.min())
        maxi = max(feature_target.max(), feature_source.max())
        size = (maxi - mini) / 75
        fig.add_trace(
            go.Histogram(
                x=feature_target,
                showlegend=showlegend,
                name=name_t,
                histnorm='probability',
                xbins=dict(
                    start=mini,
                    end=maxi,
                    size=size
                ),
                marker=dict(
                    color='darkred'
                ),
                opacity=0.6,
            ), row=k, col=j
        )
        fig.add_trace(
            go.Histogram(
                x=feature_source,
                showlegend=showlegend,
                name=name_s,
                histnorm='probability',
                xbins=dict(
                    start=mini,
                    end=maxi,
                    size=size
                ),
                marker=dict(
                    color='darkblue'
                ),
                opacity=0.6,
            ), row=k, col=j
        )

xaxis = dict(
    showline=True,
    ticks='outside',
    mirror=True,
    linecolor='grey',
    showgrid=True,
    gridcolor='grey',
    gridwidth=0.01,
)

yaxis = dict(
    showline=True,
    ticks='outside',
    mirror=True,
    linecolor='grey',
    showgrid=True,
    gridcolor='grey',
    gridwidth=0.01,
    zeroline=True,
    zerolinecolor='grey',
    zerolinewidth=0.01
)

axis_params = {}
for i in range(1, data_target.shape[1]+1):
    axis_params['xaxis{}'.format(i)] = xaxis
    axis_params['yaxis{}'.format(i)] = yaxis

fig.update_layout(
    title='Features distributions',
    width=3000,
    height=2700,
    **axis_params,
    barmode='overlay',
    font = dict(
        family="Times New Roman",
        size=16,
        color='black'
    ),
    legend = dict(
        orientation="h",
        yanchor="bottom",
        y=1.02,
        xanchor="right",
        x=1
    )
)

fig.show()
# pio.write_image(fig, 'plots/features_distributions_nn.pdf',
#                 width=1000, height=900)

In [None]:
from xgboost import XGBRegressor


In [None]:
models = []
models.append(pickle.load(open("/home/arsde/J21_SYSU/models/16_features_max_depth_10/xgb_energy_ideal_opt_16.dat", "rb")))
models.append(pickle.load(open("/home/arsde/J21_SYSU/models/16_features_max_depth_10/xgb_energy_real_opt_16.dat", "rb")))

In [None]:
import xgboost as xgb

In [None]:
model_xgb_2 = xgb.Booster()
model_xgb_2.load_model("/home/arsde/J21_SYSU/models/16_features_max_depth_10/xgb_energy_real_opt_16.dat")


In [None]:
model2 = xgb.XGBRegressor()
model2.load_model("/home/arsde/J21_SYSU/models/16_features_max_depth_10/xgb_energy_real_opt_16.dat")
