In [None]:
import pandas as pd
import json
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
import random
import numpy as np
from joblib import dump, load
from sklearn.linear_model import LinearRegression
import os
import gc
from tqdm import tqdm
import lightgbm as lgb
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso

from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor, ExtraTreesRegressor

def rmse(y_pred_test, y_test):
    return mean_squared_error(y_test, y_pred_test, squared = False)


In [None]:
try:
    exp_df = pd.read_csv('./final_exp.csv')
    is_run = False
except:
    is_run = True

In [None]:
import os
import tqdm
import imageio
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

from PIL import Image
from matplotlib import font_manager
from matplotlib.offsetbox import OffsetImage, AnnotationBbox, OffsetBox

In [None]:
def set_seaborn_style(font_family, background_color, grid_color, text_color):
    sns.set_style({
        "axes.facecolor": background_color,
        "figure.facecolor": background_color,

        "grid.color": grid_color,
        "axes.edgecolor": grid_color,
        "axes.grid": True,
        "axes.axisbelow": True,

        "axes.labelcolor": text_color,
        "text.color": text_color,
        "font.family": font_family,
        "xtick.color": text_color,
        "ytick.color": text_color,

        "xtick.bottom": False,
        "xtick.top": False,
        "ytick.left": False,
        "ytick.right": False,

        "axes.spines.left": False,
        "axes.spines.bottom": True,
        "axes.spines.right": False,
        "axes.spines.top": False,
    }
)

In [None]:
background_color = "#2F195F"
grid_color = "#582FB1"
bar_color = "#835ED4"
text_color = "#eee"

In [None]:
def show_available_fonts():
    fonts = list(set([f.name for f in font_manager.fontManager.ttflist]))
    fonts.sort()
    return fonts

In [None]:
# font_family = "PT Mono"
# set_seaborn_style(font_family, background_color, grid_color, text_color)

In [None]:
def create_bar_chart(cols, vals, color):
    return sns.barplot(
        y=cols,
        x=vals,
        orient="h",
        saturation=1,
        color=color,
        width=0.75,
    )

In [None]:
def format_axes(ax):
    ax.tick_params("x", labelsize=10, pad=16)
    ax.tick_params("y", labelsize=10, pad=8)

#     plt.xticks(
#         ticks=ax.get_xticks()[:-1],
#         labels=["{}K".format(int(x / 1000)) for x in ax.get_xticks()[:-1]]
#     )

In [None]:
# def add_bar_icons(ax, cols, vals, background_color, zoom, pad):
#     for index in range(len(cols)):
#         name = cols[index]
#         value = vals[index]
#         try:
#             icon = plt.imread(f"/kaggle/input/datn-icon-v2/{index + 1}.png")
#         except:
#             icon = plt.imread(f"/kaggle/input/datn-7/{index + 1}.png")
#         image = OffsetImage(icon, zoom=zoom, interpolation="lanczos", resample=True, visible=True)
#         image.image.axes = ax
#         ax.add_artist(AnnotationBbox(
#             image, (value, index), frameon=True,
#             xycoords="data",
#             bboxprops={
#                 "facecolor": "#fff",
#                 "linewidth": 1,
#                 "edgecolor": background_color,
#                 "boxstyle": "circle, pad={}".format(pad),
#             }
#         ))

# def add_icon(ax, icon_name, x, y):
#     icon = plt.imread("./icons/{}.png".format(icon_name))
#     image = OffsetImage(icon, zoom=0.13, interpolation="lanczos", resample=True, visible=True)
#     image.image.axes = ax

#     ax.add_artist(AnnotationBbox(
#         image, (x, y), frameon=False,
#         xycoords="axes fraction",
#     ))

In [None]:
def create_image_from_figure(fig):
    plt.tight_layout()

    fig.canvas.draw()
    data = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    data = data.reshape((fig.canvas.get_width_height()[::-1]) + (3,))
    plt.close()

    return Image.fromarray(data)

def add_padding_to_chart(chart, left, top, right, bottom, background):
    size = chart.size
    image = Image.new("RGB", (size[0] + left + right, size[1] + top + bottom), background)
    image.paste(chart, (left, top))
    return image

In [None]:

# for city in ['hcm', 'hn']:
#     for version in [0, 1, 2, 4, 5]:
#         for col in ['rmse', 'max_error', 'explained_variance_score']:
#             tmp_df = exp_df[(exp_df['version'] == version) & (exp_df['city'] == city)]
#             tmp_df = tmp_df.sort_values(by = ['rmse'])
#             if col == 'explained_variance_score':
#                 tmp_df = tmp_df.sort_values(by = ['rmse'], ascending = False)
#             model_name_list = tmp_df.model_name.tolist()
#             val_list = tmp_df[col].tolist()

#             fig = plt.figure(figsize=(18, 12))
#             ax = create_bar_chart(model_name_list, val_list, bar_color)
#             format_axes(ax)
# #             add_bar_icons(ax, model_name_list, val_list, background_color, zoom=0.06, pad=0.1)
#             plt.title(f"Data {city.title()} City - Feature Set Version {version} - Metric {col.title()}")
#             plt.tight_layout()
#             plt.savefig(f'city_{city}_version_{version}_metric_{col}.png')
#             plt.show()

In [None]:
hn_df = exp_df[exp_df['city'] == 'hn']
hcm_df = exp_df[exp_df['city'] == 'hcm']

In [None]:
# gc.collect()

In [None]:
from tqdm import tqdm

In [None]:
hcm_df

In [None]:
from pandas.plotting import table

In [35]:
tmp = []
metric = 'explained_variance_score'
for model_name in tqdm(['abr', 'cat', 'etr', 'gbr', 'knr', 'la', 'lgbm', 'linear', 'mlp', 'rf', 'ridge', 'xgb', 'bagging', 'BKPrice Post Processing']):
    tmp_df = hcm_df[hcm_df['model_name'] == model_name]
    obj = {"model_name": model_name}
    for version in [0, 1, 2, 4, 5]:
        metric_list = tmp_df[tmp_df['version'] == version][metric].tolist()
        assert len(metric_list) == 1
        obj[version] = metric_list[0]
    tmp.append(obj)
    gc.collect()
data = pd.DataFrame(tmp)
del data[0], data[4]



100%|██████████| 14/14 [00:32<00:00,  2.30s/it]


In [None]:
del data[2]

In [36]:
data

Unnamed: 0,model_name,1,2,5
0,abr,0.64498,0.64403,0.637875
1,cat,0.801008,0.798371,0.800706
2,etr,0.797821,0.797105,0.799692
3,gbr,0.803848,0.804318,0.802558
4,knr,0.37882,0.37882,0.407918
5,la,0.678565,0.678565,0.678568
6,lgbm,0.818274,0.818347,0.818304
7,linear,0.688179,0.688431,0.688343
8,mlp,0.715732,0.713783,0.713698
9,rf,0.81609,0.81417,0.814717


In [34]:
data.to_latex(index=False,
                  formatters={"name": str.upper},
                  float_format="{:.1f}".format)

'\\begin{tabular}{lrr}\n\\toprule\nmodel_name & 1 & 5 \\\\\n\\midrule\nabr & 97.8 & 116.9 \\\\\ncat & 63.7 & 63.8 \\\\\netr & 64.2 & 63.9 \\\\\ngbr & 63.3 & 63.6 \\\\\nknr & 113.8 & 111.2 \\\\\nla & 81.1 & 81.1 \\\\\nlgbm & 60.9 & 60.9 \\\\\nlinear & 80.0 & 80.0 \\\\\nmlp & 77.4 & 76.5 \\\\\nrf & 61.2 & 61.5 \\\\\nridge & 80.0 & 80.0 \\\\\nxgb & 60.6 & 60.6 \\\\\nbagging & 60.2 & 60.2 \\\\\nBKPrice Post Processing & 44.9 & 44.7 \\\\\n\\bottomrule\n\\end{tabular}\n'

In [None]:
fig, ax = plt.subplots(figsize=(18, 8))

# Define font sizes
SIZE_DEFAULT = 14
SIZE_LARGE = 16
plt.rc("font", family="Roboto")  # controls default font
plt.rc("font", weight="normal")  # controls default font
plt.rc("font", size=SIZE_DEFAULT)  # controls default text sizes
plt.rc("axes", titlesize=SIZE_LARGE)  # fontsize of the axes title
plt.rc("axes", labelsize=SIZE_LARGE)  # fontsize of the x and y labels
plt.rc("xtick", labelsize=SIZE_DEFAULT)  # fontsize of the tick labels
plt.rc("ytick", labelsize=SIZE_DEFAULT)  # fontsize of the tick labels

x = list(range(len(data)))
y0 = data[0].tolist()
y1 = data[1].tolist()
y2 = data[2].tolist()
y4 = data[4].tolist()
y5 = data[5].tolist()
y = [y0, y1, y2, y4, y5]
labels = [f"Version{i}" for i in [1, 2, 4, 5]]


# Plot the baseline
ax.plot(
    x,
    y0,
    label="Baseline",
    color="lightgray",
    linestyle="--",
    linewidth=2,
)


# Define a nice color palette:
colors = ["#2B2F42", "#8D99AE", "#EF233C", "#EF298C"]

# Plot each of the main lines
for i, label in enumerate(labels):
    # Line
    ax.plot(x, y[i + 1], label=label, color=colors[i], linewidth=3)

# Hide the all but the bottom spines (axis lines)
ax.spines["right"].set_visible(False)
ax.spines["left"].set_visible(False)
ax.spines["top"].set_visible(False)

# Only show ticks on the left and bottom spines
ax.yaxis.set_ticks_position("left")
ax.xaxis.set_ticks_position("bottom")
ax.spines["bottom"].set_bounds(min(x), max(x))

ax.set_ylabel("MAX ERROR")
ax.legend()

plt.xticks(x, data['model_name'].tolist(), rotation ='vertical')

# -------------------BEGIN-CHANGES------------------------
plt.savefig("great.png", dpi=300)
# --------------------END CHANGES------------------------