In [12]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm.notebook import tqdm
from IPython.display import clear_output

import os
import random
import datetime

from prophet import Prophet

In [50]:
def make_ts(data, window_size=2, method = 'smart'):
    '''
    input:
    method - linear or ma
    data - data with columns sellout and nth_month
    window_size - window size for moving average
    
    if method = linear or smart then window_size do nothing
    '''
    sellouts = data.sellout.reset_index(drop=True)
    months = data.nth_month.reset_index(drop=True)
    sellouts_before_fill = []
    for i in range(len(sellouts)):
        try:
            n_missing_values = months[i+1] - months[i] - 1
            sellouts_before_fill += [sellouts[i]] + [np.nan]*n_missing_values
        except:
             sellouts_before_fill += [sellouts[i]]

    months = np.arange(min(months), max(months)+1)  
    sellouts_before_fill = pd.Series(sellouts_before_fill, index = months)
    new_sellouts = fill_missing_values(sellouts_before_fill.copy(), window_size, method)
    
    # plt.title(data.sku_name.unique()[0]) #always alone sku_name
    # plt.plot(new_sellouts, marker = '.', color = 'green')
    # plt.plot(sellouts_before_fill, marker = 'o', color = 'blue')
    # plt.legend(['filled data','source data'])
    # plt.show()
    
    return new_sellouts
    
    
        
def fill_missing_values(ts, window_size, method):
    if method == 'ma':
        linear_interpol(ts, window_size)
        for i in ts.index:
            if np.isnan(ts[i]):
                ts[i] = ts[i-window_size:i].mean()
    elif method == 'linear':
        ts.interpolate(method='linear', inplace=True)
    elif method == 'smart':
        ts = smart_filling(ts)
    return ts
            
    
def linear_interpol(ts, min_row):
    while True:
        if np.isnan(ts[:min_row]).sum() == 0:
            break
        else:
            start = end =-1
            for i in ts.index:
                if start == -1 and np.isnan(ts[i]):
                    start = i - 1
                elif start!=-1 and not np.isnan(ts[i]):
                    end = i+1
                if start!=-1 and end!=-1:
                    ts[start:end].interpolate(method='linear', inplace=True)
                    break
                    
def smart_filling(ts):
    '''
    mod=0 nothing
    mod=1 filling
    first loop - for pattern
    second loop - for peaks moments
    in the end linear filling default intervals
    '''
    mod=0
    index_to_skip=-1
    
    for i in ts.index:
        try:
            if mod == 1 and i!=index_to_skip:
                index1 = random.choice(non_na_elements_for_fill) 
                index2 = random.choice(non_na_elements_for_fill) 
                ts[i] = (ts[index1] + ts[index2])/2
                if not np.isnan(ts[i+1]):
                    mod=0

            elif np.isnan(ts[i]) and ts[i+1]/ts[i-1] > 1.5 and mod == 0:
                ts[i] = (ts[i-1] + ts[i-2])/2
                mod = 1
                non_na_elements_for_fill = []
                index_to_skip = i+1 
                for j in ts[ts>=0].index:
                    if j > i+1 and len(non_na_elements_for_fill)<3:
                        non_na_elements_for_fill.append(j)
        except:
            continue
            
    for i in ts.index:
        try:
            if mod == 1:
                index1 = random.choice(non_na_elements_for_fill) 
                index2 = random.choice(non_na_elements_for_fill) 
                ts[i] = (ts[index1] + ts[index2])/2
                if not np.isnan(ts[i+1]):
                    mod=0
            elif not np.isnan(ts[i]) and ts[i]/ts[i-1] > 1.5 and np.isnan(ts[i+1]) and mod == 0:
                mod=1
                non_na_elements_for_fill = []
                for j in ts[ts>=0].index:
                    if j > i and len(non_na_elements_for_fill)<3:
                        non_na_elements_for_fill.append(j)
        except:
            continue
    ts.interpolate(method='linear', inplace=True)
    return ts

In [102]:
def date_column(x):
    print(x)
    y = x.nth_month// 12
    m = x.nth_month % 12
    return pd.Timestamp(2016 + y, m, 1)

In [51]:
train = pd.read_csv(r"New_train.csv")
test = pd.read_csv(r"Test.csv")
train = train.drop(columns=['starting_inventory','sellin','onhand_inventory', 'leftover_inventory','CAT_GENDER_BOTH',
                            'CAT_GENDER_MEN','CAT_GENDER_WOMEN','prefix'])

In [85]:
sku_train = train[train.sku_name == 'YOSHWARDTERR']

In [104]:
ts = make_ts(sku_train)

In [103]:
ts = make_ts(sku_train)
ts = pd.DataFrame(data={'nth_month':np.arange(ts.index[0], ts.index[-1]+1), 'y':ts})
ts['ds'] = ts.apply(func = date_column)

7      7
8      8
9      9
10    10
11    11
12    12
13    13
14    14
15    15
16    16
17    17
Name: nth_month, dtype: int64


AttributeError: 'Series' object has no attribute 'nth_month'

In [101]:
ts

Unnamed: 0,nth_month,y
7,7,1013.0
8,8,868.285714
9,9,723.571429
10,10,578.857143
11,11,434.142857
12,12,289.428571
13,13,144.714286
14,14,0.0
15,15,337.666667
16,16,675.333333


In [27]:
def date_column(x):
    return f'{x.year}-{x.month}-01' 

In [16]:
date = datetime.date(2020, 5, 15)

In [18]:
type(date)

datetime.date

AttributeError: 'str' object has no attribute 'datetime'

In [11]:
train.

Unnamed: 0,sku_name,sellout,month,year
0,YOSHWARDTERR,1013,7,2016
1,YOSHWARDTERR,0,2,2017
2,YOSHWARDTERR,1013,5,2017
3,YOSHUANEMARX,56728,4,2018
4,YOSHUANEMARX,90157,5,2018
...,...,...,...,...
43229,ABEAHAMASHL,25325,6,2021
43230,ABEAHAMASHL,38494,7,2021
43231,ABEAHAMASHL,56728,8,2021
43232,ABEAHAMASHL,65845,9,2021


In [31]:

# Python
df = pd.read_csv('https://raw.githubusercontent.com/facebook/prophet/main/examples/example_retail_sales.csv')
df.head()

Unnamed: 0,ds,y
0,1992-01-01,146376
1,1992-02-01,147079
2,1992-03-01,159336
3,1992-04-01,163669
4,1992-05-01,170068


In [34]:
m = Prophet()
m.fit(df)

INFO:prophet:Disabling weekly seasonality. Run prophet with weekly_seasonality=True to override this.
INFO:prophet:Disabling daily seasonality. Run prophet with daily_seasonality=True to override this.
  components = components.append(new_comp)


<prophet.forecaster.Prophet at 0x1d71c26ecd0>

In [37]:
future = m.make_future_dataframe(periods=365)

In [46]:
forecast = m.predict(future)

  components = components.append(new_comp)
  components = components.append(new_comp)


In [45]:
pd.Timestamp(2017, 1,1)

Timestamp('2017-01-01 00:00:00')