In [None]:
import os
import gc
import time
import warnings
from datetime import datetime

from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from numpy import iinfo, finfo, int8, int16, int32, int64, float32, float64

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import losses
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras import models
from tensorflow.keras import activations

from sklearn.model_selection import KFold
from sklearn.model_selection._split import _BaseKFold, indexable, _num_samples
from sklearn.utils.validation import _deprecate_positional_args

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.io as pio
from IPython.display import HTML

# 设置plotly为暗黑模式
pio.templates.default = "plotly_dark"
plot_config = dict({'scrollZoom': True, 'displayModeBar': True, 'displaylogo': False})
sns.set(style="ticks", font_scale=1.2, palette='deep', color_codes=True)
colors = ["C" + str(i) for i in range(0, 9+1)]

# 默认plotly色号
default_color_list = [
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#e377c2',  # raspberry yogurt pink
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
    ]

# 设定全局随机种子，并且屏蔽warnings
GLOBAL_RANDOM_SEED = 2022
np.random.seed(GLOBAL_RANDOM_SEED)
tf.random.set_seed(GLOBAL_RANDOM_SEED)

warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', 50)


In [None]:
class ReduceMemoryUsage():
    """通过pandas的DataFrame的每一列的类型转换，降低DataFrame的内存消耗。

    扫描pandas的DataFrame的每一列（column），获取每一列的最大最小值。判断
    其是否落在某一具体浮点数或者整型数范围之内，并进行强制类型转换。例如：
    某列（float64表示）最大最小值为[l, h]区间，而l与h分别小于float32最大最小
    表示范围[-k, +k]范围内，则可以进行类型转换为float32来表示该列元素。

    @Parameters:
    ----------
        data_table: {pandas DataFrame-like}
            pandas的DataFrame类型。
        verbose: {bool-like}
            是否打印内存精简的相关信息。

    @Return:
    ----------
        经过内存精简的DataFrame

    @References:
    ----------
    [1] https://docs.scipy.org/doc/numpy/reference/generated/numpy.iinfo.html
    [2] https://wizardforcel.gitbooks.io/ts-numpy-tut/content/3.html
    """
    def __init__(self, data_table=None, verbose=True):
        self._data_table = data_table
        self._verbose = verbose

    def type_report(self, data_table):
        """Reporting basic characteristics of the tabular data data_table."""
        data_types = list(map(str, data_table.dtypes.values))
        basic_report = pd.DataFrame(data_types, columns=["types"])
        basic_report["feature_name"] = list(data_table.columns)
        return basic_report

    def reduce_memory_usage(self):
        memory_reduced_data = self.__reduce_memory()
        return memory_reduced_data

    def __reduce_memory(self):
        print("\nReduce memory process:")
        print("-------------------------------------------")
        memory_before_reduced = self._data_table.memory_usage(
            deep=True).sum() / 1024**2
        types = self.type_report(self._data_table)
        if self._verbose is True:
            print("@Memory usage of data is {:.5f} MB.".format(
                memory_before_reduced))

        # Scan each feature in data_table, reduce the memory usage for features
        for ind, name in enumerate(types["feature_name"].values):
            # ToBeFixed: Unstable query.
            feature_type = str(
                types[types["feature_name"] == name]["types"].iloc[0])

            if (feature_type in "object") and (feature_type in "datetime64[ns]"):
                try:
                    feature_min = self._data_table[name].min()
                    feature_max = self._data_table[name].max()

                    if "int" in feature_type:
                        if feature_min > iinfo(int8).min and feature_max < iinfo(int8).max:
                            self._data_table[name] = self._data_table[name].astype(int8)
                        elif feature_min > iinfo(int16).min and feature_max < iinfo(int16).max:
                            self._data_table[name] = self._data_table[name].astype(int16)
                        elif feature_min > iinfo(int32).min and feature_max < iinfo(int32).max:
                            self._data_table[name] = self._data_table[name].astype(int32)
                        else:
                            self._data_table[name] = self._data_table[name].astype(int64)
                    else:
                        if feature_min > finfo(float32).min and feature_max < finfo(float32).max:
                            self._data_table[name] = self._data_table[name].astype(float32)
                        else:
                            self._data_table[name] = self._data_table[name].astype(float64)
                except Exception as error_msg:
                    print("\n--------ERROR INFORMATION---------")
                    print(error_msg)
                    print("Error on the {}".format(name))
                    print("--------ERROR INFORMATION---------\n")
            if self._verbose is True:
                print("Processed {} feature({}), total is {}.".format(
                    ind + 1, name, len(types)))

        memory_after_reduced = self._data_table.memory_usage(
            deep=True).sum() / 1024**2
        if self._verbose is True:
            print("@Memory usage after optimization: {:.5f} MB.".format(
                memory_after_reduced))
            print("@Decreased by {:.5f}%.".format(
                100 * (memory_before_reduced - memory_after_reduced) / memory_before_reduced))
        print("-------------------------------------------")
        return self._data_table


In [None]:
plot_config = dict({'scrollZoom': False, 'displayModeBar': True, 'displaylogo': False})
fig = make_subplots(rows=2, cols=2)
ind = 1
for row in range(1, 2+1):
    for col in range(1, 2+1):
        resp_tmp = np.cumsum(train_df["resp_{}".format(ind)].head(200000).values)
        ts_id = train_df["ts_id"].head(200000).values

        fig.add_trace(go.Scatter(x=ts_id, y=resp_tmp,
                                 mode="lines", line_width=2,
                                 line_color=default_color_list[ind],
                                 name="resp_{}".format(row)), row=row, col=col)
        fig.update_yaxes(title_text="resp_{}".format(ind), ticks="outside",
                         row=row, col=col, automargin=True)
        ind += 1

# fig.update_layout(go.Layout(title="resp plot", width=900, height=800,
#                             showlegend=False))
# fig.show(config=plot_config)