In [1]:
import pandas as pd
import numpy as np
import scipy
from scipy.stats import lognorm, norm, poisson
from sklearn.preprocessing import MinMaxScaler, LabelEncoder, OneHotEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, make_scorer
import category_encoders as ce
from statsmodels.tsa.seasonal import seasonal_decompose
from copy import copy, deepcopy
import plotly
from plotly import tools
import plotly.graph_objs as go
from plotly.subplots import make_subplots
from copy import copy, deepcopy
import dask
import gplearn as gpl        
from gplearn.genetic import SymbolicRegressor, SymbolicTransformer
import zipfile

In [2]:
def plot_gp(mu, lb, ub, test_x, test_y, train_x=None, train_y=None, name='', samples={},
            layout='v', xaxis_title='Time', yaxis_title='Sales', fig_size=[1000,500], w=3, f=10):
    fig = make_subplots(rows=1, cols=1, subplot_titles=("Samples"))
    samples = {'sample '+str(i): s for i, s in enumerate(samples)} if not isinstance(samples, dict) else samples
    if train_x is not None:
        fig.add_trace(go.Scatter(x=train_x, y=train_y, mode='lines', name='History', line=dict(width=w), line_color='#1a76ff'))  # plot training data

    fig.add_trace(
        go.Scatter(x=test_x, y=ub, fill=None, mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))
    fig.add_trace(
        go.Scatter(x=test_x, y=lb, fill='tonexty', mode='lines', line_color='rgba(199, 19, 19, 0.3)',
                   fillcolor='rgba(249, 129, 37, 0.3)', showlegend=True, name='95% uncertainty interval'))

    fig.add_trace(go.Scatter(x=test_x, y=mu, line=dict(color='#c71313', width=w), mode='lines', name='Skyolia Forecast'))  # plot the mean
    fig.add_trace(go.Scatter(x=test_x, y=test_y, line=dict(color='#1a76ff', width=w), mode='lines', name='Observed'))
    for k, v in samples.items():
        fig.add_trace(go.Scatter(x=test_x, y=v, name=k, mode='lines', 
                                 line=dict(width=w)))  # plot samples
    fig.update_layout(title_text=name, paper_bgcolor='#343434', plot_bgcolor='#343434', xaxis_title=xaxis_title, yaxis_title=yaxis_title,
                          font=dict(family="Montserrat", color="#fff", size=f), title_x=0.5, hovermode="x")
    fig.update_xaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    fig.update_yaxes(showgrid=True, showline=False, gridcolor='#c9c9c9', gridwidth=0.0005)
    return fig

def confidence_interval(mu, cov):
    std = np.sqrt(np.diag(cov)) #compute std
    uncertainty = 1.96 * std
    return mu, std, mu-uncertainty, mu+uncertainty

def order_quantity(mu, std, cu, co):
    cf = cu/(cu+co)
    return scipy.stats.norm.ppf(cf, loc=mu, scale=std)

def plot_cov(covs, cols, subplot_titles, labels=None):
    fig = make_subplots(rows=int(len(covs)/cols) + 1, cols=cols, subplot_titles=subplot_titles)
    height = (1000/cols)*2
    for i, cov in enumerate(covs):
        row, col = int(i / cols)+1, (i%cols)+1
        fig.add_trace(go.Heatmap(z=cov, x=labels, y=labels, colorscale='Greys'), row=row, col=col)
    fig.update_layout(title_text='Cov matrix', height=height)#, yaxis1=dict(domain=[0, 1]), yaxis1=dict(domain=[0, 1])
    return fig

def plot_ts_decomposition(df, index, obs, model="additive", features=False, period=None, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = seasonal_decompose(decompose, model=model, period=period)
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if features:
        features = [col for col in list(df.columns) if col not in [index, obs]]
        for col in features:
            fig.add_trace(go.Scatter(x=decompose.index, y=df[col].values, name=col, mode='lines'))
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

def plot_stl_decomposition(df, index, obs, model="additive", period=None, seasonal=7, samples=None):
    df.index = df[index]
    decompose = df[[index, obs]]
    decompose.index = df[index]
    decompose = decompose[[obs]]
    
    decomposition = STL(decompose, period=period, seasonal=seasonal).fit()
    trend, seasonal, residual = decomposition.trend, decomposition.seasonal, decomposition.resid
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=decompose.index, y=decompose.iloc[:,0], mode='lines', name='observed')) #plot the observed
    fig.add_trace(go.Scatter(x=decompose.index, y=trend.tolist(), mode='lines', name='trend')) #plot the trend
    fig.add_trace(go.Scatter(x=decompose.index, y=seasonal.tolist(), mode='lines', name='seasonal')) #plot the seasonal
    fig.add_trace(go.Scatter(x=decompose.index, y=residual.tolist(), mode='lines', name='residual')) #plot the residual
    if samples is not None:
        for i, s in enumerate(samples):
            fig.add_trace(go.Scatter(x=decompose.index, y=s, name='sample '+str(i), mode='lines')) #plot samples
    fig.update_layout(title_text='Decomposition')
    return fig, trend.dropna().values, seasonal.dropna().values, residual.dropna().values

In [38]:
df = pd.read_csv('/home/skyolia/JupyterProjects/data/time_series/nestle.csv', sep=';')
df.rename(columns={"PERIOD_TAG": "Date", 'numeric_distribution_selling_promotion': 'promo',
                  'numeric_distribution_selling_promotion_hyperparmarkets': 'hyp_promo'}, inplace=True)
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
df = df.loc[(df['product_group'] == 'Product_17')]
df = df.drop(columns=[col for col in df.columns if len(df[col].unique()) == 1])
df

Unnamed: 0,Date,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2
11,2016-08-07,485,485,0,0.0,0.000000,0.0,0.0
40,2016-08-14,321,321,393,6.1,0.000000,0.0,0.0
71,2016-08-21,623,623,345,5.5,0.000000,0.0,0.0
102,2016-08-28,422,422,375,5.9,0.000000,0.0,0.0
134,2016-09-04,218,218,596,10.4,61.849711,1.0,1.0
...,...,...,...,...,...,...,...,...
6679,2019-12-01,1763,1902,637,0.2,0.000000,0.0,0.0
6721,2019-12-08,1140,1382,797,0.2,0.000000,0.0,0.0
6763,2019-12-15,552,726,685,0.6,26.589595,0.0,1.0
6805,2019-12-22,835,835,2756,52.8,88.439306,0.0,1.0


In [39]:
'''df['dayofweek'] = df['Date'].dt.dayofweek 
df['sin_dayofweek'] = np.sin(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
df['cos_dayofweek'] = np.cos(2*np.pi*df['dayofweek']/np.max(df['dayofweek']))
df.drop(columns=['dayofweek'], inplace=True)'''

df['dayofyear'] = df['Date'].dt.dayofyear
df['sin_dayofyear'] = np.sin(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
df['cos_dayofyear'] = np.cos(2*np.pi*df['dayofyear']/np.max(df['dayofyear']))
df.drop(columns=['dayofyear'], inplace=True)
df

Unnamed: 0,Date,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,sin_dayofyear,cos_dayofyear
11,2016-08-07,485,485,0,0.0,0.000000,0.0,0.0,-0.601624,-0.798779
40,2016-08-14,321,321,393,6.1,0.000000,0.0,0.0,-0.693281,-0.720667
71,2016-08-21,623,623,345,5.5,0.000000,0.0,0.0,-0.774884,-0.632103
102,2016-08-28,422,422,375,5.9,0.000000,0.0,0.0,-0.845249,-0.534373
134,2016-09-04,218,218,596,10.4,61.849711,1.0,1.0,-0.903356,-0.428892
...,...,...,...,...,...,...,...,...,...,...
6679,2019-12-01,1763,1902,637,0.2,0.000000,0.0,0.0,-0.493776,0.869589
6721,2019-12-08,1140,1382,797,0.2,0.000000,0.0,0.0,-0.385663,0.922640
6763,2019-12-15,552,726,685,0.6,26.589595,0.0,1.0,-0.271958,0.962309
6805,2019-12-22,835,835,2756,52.8,88.439306,0.0,1.0,-0.154309,0.988023


In [40]:
output_col = ['SellOut']
time_col = 'Date'
to_remove = ['dispatches_SellIn', 'orders_SellIn']
categorical = []
binary = ['type_promo_1', 'type_promo_2']
numerical = [col for col in df.columns if col not in categorical + binary + to_remove + output_col + [time_col]]
df[numerical] = df[numerical].apply(pd.to_numeric,1)
fig = go.Figure(data=go.Heatmap(z=df[numerical+output_col].corr(),x=numerical+output_col,y=numerical+output_col))
fig.show()

In [41]:
train = df[df['Date'] < '2019-07-01']
test = df[df['Date']>='2019-07-01']
features = categorical + numerical + binary

X_train, X_test = train[features], test[features]
Y_train, Y_test = train[output_col] + 1e-15, test[output_col] + 1e-15
T_train, T_test = train[time_col], test[time_col]

y_scaler = MinMaxScaler(feature_range=(0, 1))
Y_train, Y_test = y_scaler.fit_transform(Y_train).ravel() + 1e-15, Y_test.values.ravel() + 1e-15

MS = MinMaxScaler(feature_range=(-1, 1))
scaled_train = MS.fit_transform(X_train[numerical])
scaled_test = MS.transform(X_test[numerical])
X_train[numerical], X_test[numerical] = scaled_train, scaled_test

encoder = ce.CatBoostEncoder(verbose=1, cols=categorical)
encoder.fit(X_train[categorical], Y_train)
X_train[categorical] = encoder.transform(X_train[categorical])
X_test[categorical] = encoder.transform(X_test[categorical])

X_train.shape, X_test.shape, Y_train.shape, Y_test.shape



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



((152, 6), (26, 6), (152,), (26,))

In [42]:
X_train

Unnamed: 0,promo,hyp_promo,sin_dayofyear,cos_dayofyear,type_promo_1,type_promo_2
11,-1.000000,-1.00000,-0.601630,-0.798813,0.0,0.0
40,-0.758416,-1.00000,-0.693288,-0.720699,0.0,0.0
71,-0.782178,-1.00000,-0.774891,-0.632134,0.0,0.0
102,-0.766337,-1.00000,-0.845257,-0.534401,0.0,0.0
134,-0.588119,0.27381,-0.903364,-0.428918,1.0,1.0
...,...,...,...,...,...,...
5587,-0.996040,-1.00000,0.486277,-0.873842,0.0,0.0
5629,-0.996040,-1.00000,0.377711,-0.925960,0.0,0.0
5671,-0.996040,-1.00000,0.263668,-0.964651,0.0,0.0
5713,-0.996040,-1.00000,0.145801,-0.989351,0.0,0.0


In [44]:
fig, trend, seasonal, residual = plot_ts_decomposition(train, time_col, output_col[0], features=True)
fig.show()
print(np.mean(trend), np.var(trend), np.std(trend))
print(np.mean(seasonal), np.var(seasonal), np.std(seasonal))
print(np.mean(residual), np.var(residual), np.std(residual))

560.3686538461537 7965.0737004807615 89.24726158533248
-2.1397783011522917 44411.71276043721 210.74086637488517
2.036982248520733 34752.19240199704 186.41939921048194


In [None]:
def power_f(x, d):
    d = np.round(np.clip(d, 1, 2)).astype(int)
    return np.power(x, d)

def _tanh(x, a, b):
    return np.tanh(a*x+b)

power = gpl.functions.make_function(power_f, 'pow', 2, wrap=True)
tanh = gpl.functions.make_function(_tanh, 'tanh', 3, wrap=True)
function_set = ['add', 'sub', 'mul', 'div', 'sqrt', power, 'log', 'min', 'max', 'sin', 'cos', 'abs', tanh, 'inv', 'neg']

gp = SymbolicTransformer(generations=1000, population_size=2000,
                         hall_of_fame=100, n_components=5, const_range=(-5, 5),
                         function_set=function_set, metric='spearman',
                         parsimony_coefficient=0.0005,
                         max_samples=0.9, verbose=1,
                         random_state=0, n_jobs=-1)

gp.fit(X_train, Y_train)
print(gp._program)

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    14.72         0.177603        6         0.470043        0.0422796     25.30m
   1     9.40         0.312159       43         0.496002         0.070378     21.94m
   2     9.51         0.330037       43         0.508023        0.0916666     24.16m
   3    11.40         0.329153       51         0.535767        0.0963128     27.81m
   4    18.21         0.312048       65         0.535525        0.0840574     33.78m
   5    32.37         0.315726       44         0.568504        0.0741837     38.95m
   6    44.93         0.337795       56         0.576138         0.182605     46.93m
   7    44.75         0.337719       72         0.584536        0.0286298     56.93m
   8    45.66         0.352174       57         0.610823        0.0543371  

  94    84.22         0.482377       83         0.721849         0.155785    254.99m
  95    84.60         0.481695       75         0.723903         0.170791    234.16m
  96    84.91         0.489067      163         0.725097         0.270878    247.94m
  97    85.60         0.489243      166         0.724689         0.252714    258.37m
  98    85.44         0.484584       92         0.727358          0.18644    257.90m
  99    84.15         0.480821       81         0.721987          0.17032    258.75m
 100    85.11         0.485946       87         0.721256         0.152512    264.80m
 101    84.19         0.491364      158         0.738527         0.223509    248.54m
 102    87.12         0.487696       98         0.725523         0.171886    250.42m
 103    85.90         0.487214      101         0.729154          0.29225    256.06m
 104    85.55         0.493978       89         0.727186         0.273184    265.99m
 105    85.83         0.494816       93          0.73963        0

 191    88.56         0.491054       98         0.726098         0.313908    251.49m
 192    87.43         0.489303      105         0.743086         0.315044    251.62m
 193    88.81         0.496044       89         0.742951         0.147386    243.91m
 194    89.39         0.492429       95         0.740556         0.182811    245.78m
 195    90.48         0.500304       92         0.744585         0.310223    263.14m
 196    90.34         0.500814      116          0.73091          0.41124    242.95m
 197    90.56         0.491624      104         0.740039         0.177906    252.28m
 198    91.62         0.501593       89         0.734839         0.426106    248.56m
 199    91.68            0.503      131         0.740742         0.190744    252.32m
 200    91.54         0.495555      101         0.738455         0.189804    247.93m
 201    90.78         0.493031       93         0.732214         0.267058    271.26m
 202    90.47         0.492397      127         0.740691         

 288   108.30         0.523519      102         0.765596         0.330933    274.76m
 289   108.34         0.524118      109         0.768174         0.104037    240.70m
 290   107.36            0.518      131         0.762734         0.234582    243.99m
 291   107.24         0.533696       96         0.771945         0.349824    241.02m
 292   109.00          0.52404      101          0.77444        0.0757212    235.42m
 293   107.40         0.521625      100         0.758544         0.293874    255.15m
 294   105.23          0.51612      111         0.759126         0.316552    244.33m
 295   105.84         0.527019      103         0.771761         0.333274    279.81m
 296   106.46         0.528933      108         0.772203        0.0903471    241.57m
 297   104.58         0.525378      116         0.768519         0.360073    244.52m
 298   102.85         0.514255      110         0.773351         0.276293    232.28m
 299   104.61         0.524008      110         0.765546         

 385   106.22         0.527139      104         0.765547         0.271264    211.80m
 386   108.40         0.522763      104         0.758762         0.403671    206.60m
 387   108.78         0.531561      112         0.774966         0.342136    212.36m
 388   108.58         0.518454      115           0.7734         0.314382    218.40m
 389   109.36         0.520764      148         0.773879         0.027727    212.99m
 390   110.34         0.533724       95         0.758739          0.20113    216.44m
 391   108.47         0.526365      119         0.767578         0.212205    264.07m
 392   108.29         0.515839       97         0.769462        0.0687687    212.93m
 393   107.25         0.527764      128         0.766915         0.276759    208.23m
 394   106.82         0.525378      106         0.768689         0.216751    200.65m
 395   107.32         0.522214      115         0.767018         0.296593    204.71m
 396   107.83         0.523298      156         0.766001         

 482   113.24         0.534235      123         0.785121         0.198468    185.74m
 483   113.84         0.529806      155         0.777167         0.178322    190.93m
 484   113.56         0.528682      127         0.775959          0.27081    190.99m
 485   111.89         0.533185      113         0.772146         0.384996    190.49m
 486   111.44         0.532449      115         0.768025         0.270675    191.26m
 487   109.42         0.520614      106         0.771994         0.267716    187.43m
 488   111.57         0.538063      120         0.768475         0.244717    186.66m
 489   112.29         0.530937      122         0.766581         0.213906    184.38m
 490   109.56         0.524718      124         0.767877         0.209475    186.77m
 491   109.34          0.52975      132         0.771073          0.25266    175.93m
 492   110.07         0.525971      104         0.775582         0.277773    181.26m
 493   109.95         0.530572      107         0.768658         

 579   110.24         0.530967      110         0.779071         0.119331    156.08m
 580   108.01         0.524177      176         0.773657         0.335521    154.54m
 581   107.40          0.53321      116         0.771016         0.384173    147.79m
 582   107.56         0.519277       97         0.765047         0.366874    152.81m
 583   107.84         0.524032      155         0.773498         0.213868    154.50m
 584   107.93         0.525817      107         0.767269         0.370002    154.53m
 585   106.20         0.525415      100         0.772196         0.309182    153.61m
 586   105.45         0.514783      120         0.779314        0.0181329    155.19m
 587   105.73         0.519845      112         0.771347          0.14365    144.78m
 588   105.66         0.522339      105         0.763947         0.415818    149.27m
 589   106.40         0.519732      106         0.770575         0.305866    144.37m
 590   105.49          0.51327      107           0.7683         

 676   103.47         0.513466      113         0.765626         0.492884    112.49m
 677   102.68         0.513185      106          0.78327          0.22302    116.78m
 678   103.87         0.532476       97         0.765189         0.429512    112.64m
 679   102.55         0.515885      110          0.79027           0.1064    116.42m
 680   103.62         0.517267      104         0.786281        0.0768595    115.20m
 681   102.04          0.50477      110          0.76702         0.403136    109.34m
 682   104.40         0.518129      101         0.770882         0.318987    110.36m
 683   102.83         0.509716      101         0.765216         0.308332    107.76m
 684   104.16         0.519713      100         0.768642        0.0379771    151.79m
 685   105.46         0.517388      109         0.770265         0.376854    109.14m
 686   104.75         0.516762      108         0.780724        0.0548008    108.96m
 687   103.81         0.521389       97         0.772112         

 773   106.26          0.52894      134         0.760652         0.190662     83.89m
 774   106.34         0.531163      142         0.770193         0.294899     86.38m
 775   106.32         0.526096      104         0.774743        0.0330631     84.86m
 776   107.78         0.522612      143         0.766372         0.293286     83.10m
 777   107.16         0.535119      125         0.789548           0.1509     83.89m
 778   107.31         0.529253      106         0.776164         0.170689     79.87m
 779   105.93         0.527069      120         0.774546         0.127214     82.08m
 780   107.88         0.526999      117         0.771005         0.363156     79.14m
 781   107.35         0.523343      119          0.77182         0.100344     82.27m
 782   107.78         0.529039      243          0.78508        0.0776653     79.92m
 783   107.03          0.52186      113          0.77862        0.0762422     78.53m
 784   105.80         0.511204      120         0.772345         

In [14]:
train_feat = gp.transform(X_train)
new_train = np.hstack((X_train, train_feat))
test_feat = gp.transform(X_test)
new_test = np.hstack((X_test, test_feat))

array([[ 4.12354032e-01, -1.00000000e+00, -1.00000000e+00, ...,
        -3.55295829e-01, -3.55295829e-01, -3.55295829e-01],
       [ 2.15729997e-02, -1.00000000e+00, -1.00000000e+00, ...,
        -9.72457636e-04, -9.72457636e-04, -9.72457636e-04],
       [ 4.49998020e-02, -1.00000000e+00, -1.00000000e+00, ...,
        -4.23127083e-03, -4.23127083e-03, -4.23127083e-03],
       ...,
       [ 2.15729997e-02, -9.97874601e-01, -1.00000000e+00, ...,
        -9.72457636e-04, -9.72457636e-04, -9.72457636e-04],
       [ 4.49998020e-02, -9.95749203e-01, -1.00000000e+00, ...,
        -4.23127083e-03, -4.23127083e-03, -4.23127083e-03],
       [ 8.22482218e-02, -9.61742827e-01, -1.00000000e+00, ...,
        -1.41352226e-02, -1.41352226e-02, -1.41352226e-02]])

In [21]:
train

Unnamed: 0,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,sin_dayofyear,cos_dayofyear
2,2016-08-07,Product_04,3976,3976,0,0.0,0.0,0.0,0.0,-0.601624,-0.798779
8,2016-08-07,Product_11,363,363,0,0.0,0.0,0.0,0.0,-0.601624,-0.798779
11,2016-08-07,Product_17,485,485,0,0.0,0.0,0.0,0.0,-0.601624,-0.798779
20,2016-08-07,Product_32,224,224,0,0.0,0.0,0.0,0.0,-0.601624,-0.798779
30,2016-08-14,Product_04,8845,8845,3752,71.9,0.0,0.0,0.0,-0.693281,-0.720667
...,...,...,...,...,...,...,...,...,...,...,...
5728,2019-06-23,Product_32,2479,2896,1117,0.6,0.0,0.0,0.0,0.145799,-0.989314
5742,2019-06-30,Product_04,7462,7462,5698,0.7,0.0,0.0,0.0,0.025818,-0.999667
5749,2019-06-30,Product_11,182,727,286,0.1,0.0,0.0,0.0,0.025818,-0.999667
5755,2019-06-30,Product_17,554,554,703,0.2,0.0,0.0,0.0,0.025818,-0.999667


In [32]:
train_feat = gp.transform(X_train)
train_feat = pd.DataFrame(train_feat, columns=['feat_'+str(i) for i in range(train_feat.shape[1])])
new_train = pd.concat((train.reset_index(), train_feat), axis=1)

test_feat = gp.transform(X_test)
test_feat = pd.DataFrame(test_feat, columns=['feat_'+str(i) for i in range(test_feat.shape[1])])
new_test = pd.concat((test.reset_index(), test_feat), axis=1)
new_train

Unnamed: 0,index,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,...,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9
0,2,2016-08-07,Product_04,3976,3976,0,0.0,0.0,0.0,0.0,...,-0.861628,-0.146508,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296
1,8,2016-08-07,Product_11,363,363,0,0.0,0.0,0.0,0.0,...,-0.045078,-0.000021,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972
2,11,2016-08-07,Product_17,485,485,0,0.0,0.0,0.0,0.0,...,-0.094029,-0.000190,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231
3,20,2016-08-07,Product_32,224,224,0,0.0,0.0,0.0,0.0,...,-0.171861,-0.001163,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135
4,30,2016-08-14,Product_04,8845,8845,3752,71.9,0.0,0.0,0.0,...,-0.861628,-0.146508,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
603,5728,2019-06-23,Product_32,2479,2896,1117,0.6,0.0,0.0,0.0,...,-0.171861,-0.001163,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135
604,5742,2019-06-30,Product_04,7462,7462,5698,0.7,0.0,0.0,0.0,...,-0.861628,-0.146508,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296
605,5749,2019-06-30,Product_11,182,727,286,0.1,0.0,0.0,0.0,...,-0.045078,-0.000021,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972
606,5755,2019-06-30,Product_17,554,554,703,0.2,0.0,0.0,0.0,...,-0.094029,-0.000190,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231


In [33]:
df = pd.concat((new_train, new_test))
df

Unnamed: 0,index,Date,product_group,dispatches_SellIn,orders_SellIn,SellOut,promo,hyp_promo,type_promo_1,type_promo_2,...,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9
0,2,2016-08-07,Product_04,3976,3976,0,0.0,0.000000,0.0,0.0,...,-0.861628,-0.146508,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296
1,8,2016-08-07,Product_11,363,363,0,0.0,0.000000,0.0,0.0,...,-0.045078,-0.000021,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972,-0.000972
2,11,2016-08-07,Product_17,485,485,0,0.0,0.000000,0.0,0.0,...,-0.094029,-0.000190,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231
3,20,2016-08-07,Product_32,224,224,0,0.0,0.000000,0.0,0.0,...,-0.171861,-0.001163,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135
4,30,2016-08-14,Product_04,8845,8845,3752,71.9,0.000000,0.0,0.0,...,-0.861628,-0.146508,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,6820,2019-12-22,Product_32,1982,1982,1540,0.3,0.000000,0.0,0.0,...,-0.171861,-0.001163,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135,-0.014135
100,6834,2019-12-29,Product_04,4510,4510,5162,0.3,0.000000,0.0,0.0,...,-0.861628,-0.146508,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296,-0.355296
101,6841,2019-12-29,Product_11,362,362,361,4.1,62.427746,1.0,1.0,...,-0.072814,-0.000034,-0.001571,-0.001571,-0.001571,-0.001571,-0.001571,-0.001571,-0.001571,-0.001571
102,6847,2019-12-29,Product_17,385,385,672,3.2,0.000000,0.0,0.0,...,-0.094029,-0.000190,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231,-0.004231


In [37]:
columns=['feat_'+str(i) for i in range(train_feat.shape[1])]
fig = go.Figure(data=go.Heatmap(z=df[columns+numerical+output_col].corr(),x=columns+numerical+output_col,y=columns+numerical+output_col))
fig.show()

In [34]:
df.to_csv('gfg_augmented_data.csv', index=False)