In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm

from sklearn.gaussian_process import GaussianProcessRegressor
import sklearn.gaussian_process.kernels as K

import time

import warnings
warnings.filterwarnings("ignore")

In [2]:
n = 140
h = 28
start = 1941 - h - n
end = 1941

## Training data

In [3]:
tr_last = 1913
max_lags = 57 
start_day = 350

numcols = [str(day) for day in range(start_day,tr_last+1)]

dtype = {numcol:"float32" for numcol in numcols} 

train = pd.read_csv("sales_train_evaluation.csv", dtype = dtype)

In [4]:
ids = train.loc[:, 'id'].to_frame()
#print(ids.shape)

#useless_slice = train.loc[:, 'd_1914':'d_1941']
#print(useless_slice.shape)

#df = pd.concat([ids, useless_slice], axis = 1)
#print(df.shape)

## Calendar 

In [5]:
CAL_DTYPES={"weekday": "category", 
            'wm_yr_wk': 'int16', 
            "wday": "int16",
            "month": "int16", 
            "year": "int16", 
            "event_name_1": "category", 
            "event_name_2": "category", 
            "event_type_1": "category", 
            "event_type_2": "category", 
            "snap_CA": "float32", 
            'snap_TX': 'float32', 
            'snap_WI': 'float32'}

calendar = pd.read_csv("calendar.csv", dtype = CAL_DTYPES)

calendar["date"] = pd.to_datetime(calendar["date"]) # this changes the format of the 'date' column to handier one

for col, col_dtype in CAL_DTYPES.items():
        if col_dtype == "category":
            calendar[col] = calendar[col].cat.codes.astype("int16") # changes category to int16! so now Boolean
            calendar[col] -= calendar[col].min() # this changes the -1 values in the event_name and _type columns to 0


events = calendar.loc[:, 'event_name_1':'event_type_2'][start:end].values.tolist()

In [6]:
print(len(events))

168


In [7]:
snap_CA = [ [x] for x in calendar.loc[:, 'snap_CA'][start:end].tolist()]
snap_TX = [ [x] for x in calendar.loc[:, 'snap_TX'][start:end].tolist()]
snap_WI = [ [x] for x in calendar.loc[:, 'snap_WI'][start:end].tolist()]

In [8]:
print(len(snap_CA))

168


# Second 30490 rows of submission

In [9]:
training_data = train.loc[:, 'd_1774':'d_1913']
ids = train.loc[:, 'id']
store_ids = train.loc[:, 'store_id']
training_data.shape

(30490, 140)

In [13]:
kernel = K.RationalQuadratic()

In [14]:
matrix = []

In [None]:
%%time
for i in tqdm(range(len(training_data))):
    print(i)
    t1 = time.time()
    
    x = [ [i] for i in range(start, end)]
    x = [x + y for x, y in zip(x, events)]
    store_id = store_ids.loc[0]

    if store_id in ['CA_1', 'CA_2', 'CA_3', 'CA_4']:
        x = [x + y for x, y in zip(x, snap_CA)]
    elif store_id in ['TX_1', 'TX_2', 'TX_3']:
        x = [x + y for x, y in zip(x, snap_TX)]
    elif store_id in ['WI_1', 'WI_2', 'WI_3']:
        x = [x + y for x, y in zip(x, snap_WI)]
    
    X = x[:n]
    
    Y = training_data.loc[i, :].tolist()
    
    
    gp = GaussianProcessRegressor(kernel=kernel, n_restarts_optimizer=9)

    gp.fit(X, Y)

    Y_pred = gp.predict(x, return_std=False)
    
    predictions = Y_pred[n:]
    
    row = [ids[i]] + predictions.tolist()

    matrix.append(row)
    t2 = time.time()

frame = pd.DataFrame(matrix, columns = ["id"] + ["F" + str(i) for i in range (1, 29)])
frame.shape
frame.to_csv('out6.csv', index=False)


  0%|          | 0/30490 [00:00<?, ?it/s]

0


  0%|          | 1/30490 [00:00<5:55:02,  1.43it/s]

1


  0%|          | 2/30490 [00:01<7:15:27,  1.17it/s]

2


  0%|          | 3/30490 [00:03<7:53:50,  1.07it/s]

3


  0%|          | 4/30490 [00:03<6:30:00,  1.30it/s]

4


  0%|          | 5/30490 [00:04<6:14:54,  1.36it/s]

5


  0%|          | 6/30490 [00:04<6:18:46,  1.34it/s]

6


  0%|          | 7/30490 [00:05<5:15:40,  1.61it/s]

7


  0%|          | 8/30490 [00:05<4:43:45,  1.79it/s]

8


  0%|          | 9/30490 [00:06<5:14:22,  1.62it/s]

9


  0%|          | 10/30490 [00:07<5:31:33,  1.53it/s]

10


  0%|          | 11/30490 [00:08<6:23:27,  1.32it/s]

11


  0%|          | 12/30490 [00:08<6:49:00,  1.24it/s]

12


  0%|          | 13/30490 [00:09<6:43:37,  1.26it/s]

13


  0%|          | 14/30490 [00:10<6:26:42,  1.31it/s]

14


  0%|          | 15/30490 [00:11<6:02:28,  1.40it/s]

15


  0%|          | 16/30490 [00:11<5:48:08,  1.46it/s]

16


  0%|          | 17/30490 [00:11<4:48:20,  1.76it/s]

17


  0%|          | 18/30490 [00:13<6:02:24,  1.40it/s]

18


  0%|          | 19/30490 [00:13<4:55:11,  1.72it/s]

19


  0%|          | 20/30490 [00:13<4:54:52,  1.72it/s]

20


  0%|          | 21/30490 [00:14<5:39:11,  1.50it/s]

21


  0%|          | 22/30490 [00:15<5:34:48,  1.52it/s]

22


  0%|          | 23/30490 [00:15<4:51:21,  1.74it/s]

23


  0%|          | 24/30490 [00:16<4:43:56,  1.79it/s]

24


  0%|          | 25/30490 [00:17<5:10:42,  1.63it/s]

25


  0%|          | 26/30490 [00:17<5:21:23,  1.58it/s]

26


  0%|          | 27/30490 [00:18<5:30:30,  1.54it/s]

27


  0%|          | 28/30490 [00:19<5:53:04,  1.44it/s]

28


  0%|          | 29/30490 [00:20<6:11:58,  1.36it/s]

29


  0%|          | 30/30490 [00:20<6:04:47,  1.39it/s]

30


  0%|          | 31/30490 [00:21<6:55:21,  1.22it/s]

31


  0%|          | 32/30490 [00:22<6:31:00,  1.30it/s]

32


  0%|          | 33/30490 [00:23<6:16:12,  1.35it/s]

33


  0%|          | 34/30490 [00:23<6:22:20,  1.33it/s]

34


  0%|          | 35/30490 [00:24<6:24:28,  1.32it/s]

35


  0%|          | 36/30490 [00:25<6:00:02,  1.41it/s]

36


  0%|          | 37/30490 [00:25<5:31:30,  1.53it/s]

37


  0%|          | 38/30490 [00:26<5:38:25,  1.50it/s]

38


  0%|          | 39/30490 [00:27<5:44:40,  1.47it/s]

39


  0%|          | 40/30490 [00:27<5:31:58,  1.53it/s]

40


  0%|          | 41/30490 [00:28<6:02:08,  1.40it/s]

41


  0%|          | 42/30490 [00:29<6:27:36,  1.31it/s]

42


  0%|          | 43/30490 [00:29<5:42:47,  1.48it/s]

43


  0%|          | 44/30490 [00:30<5:41:52,  1.48it/s]

44


  0%|          | 45/30490 [00:31<5:22:49,  1.57it/s]

45


  0%|          | 46/30490 [00:31<5:33:50,  1.52it/s]

46


  0%|          | 47/30490 [00:32<6:21:20,  1.33it/s]

47


  0%|          | 48/30490 [00:33<5:43:14,  1.48it/s]

48


  0%|          | 49/30490 [00:33<5:02:25,  1.68it/s]

49


  0%|          | 50/30490 [00:34<5:19:23,  1.59it/s]

50


  0%|          | 51/30490 [00:35<5:14:43,  1.61it/s]

51


  0%|          | 52/30490 [00:36<6:05:14,  1.39it/s]

52


  0%|          | 53/30490 [00:36<5:16:55,  1.60it/s]

53


  0%|          | 54/30490 [00:37<5:15:55,  1.61it/s]

54


  0%|          | 55/30490 [00:37<5:08:55,  1.64it/s]

55


  0%|          | 56/30490 [00:38<5:47:16,  1.46it/s]

56


  0%|          | 57/30490 [00:39<6:25:35,  1.32it/s]

57


  0%|          | 58/30490 [00:39<5:29:17,  1.54it/s]

58


  0%|          | 59/30490 [00:40<4:47:20,  1.77it/s]

59


  0%|          | 60/30490 [00:40<5:15:28,  1.61it/s]

60


  0%|          | 61/30490 [00:41<5:50:39,  1.45it/s]

61


  0%|          | 62/30490 [00:42<5:52:38,  1.44it/s]

62


  0%|          | 63/30490 [00:43<5:29:49,  1.54it/s]

63


  0%|          | 64/30490 [00:43<6:14:20,  1.35it/s]

64


  0%|          | 65/30490 [00:44<6:29:44,  1.30it/s]

65


  0%|          | 66/30490 [00:45<6:37:07,  1.28it/s]

66


  0%|          | 67/30490 [00:46<6:17:14,  1.34it/s]

67


  0%|          | 68/30490 [00:47<6:20:24,  1.33it/s]

68


  0%|          | 69/30490 [00:47<5:48:09,  1.46it/s]

69


  0%|          | 70/30490 [00:48<5:30:29,  1.53it/s]

70


  0%|          | 71/30490 [00:49<7:12:11,  1.17it/s]

71


  0%|          | 72/30490 [00:50<6:26:39,  1.31it/s]

72


  0%|          | 73/30490 [00:50<6:51:31,  1.23it/s]

73


  0%|          | 74/30490 [00:51<6:51:56,  1.23it/s]

74


  0%|          | 75/30490 [00:52<7:00:42,  1.20it/s]

75


  0%|          | 76/30490 [00:53<6:11:41,  1.36it/s]

76


  0%|          | 77/30490 [00:54<6:46:43,  1.25it/s]

77


  0%|          | 78/30490 [00:54<6:45:40,  1.25it/s]

78


  0%|          | 79/30490 [00:55<6:37:41,  1.27it/s]

79


  0%|          | 80/30490 [00:56<5:53:20,  1.43it/s]

80


  0%|          | 81/30490 [00:56<5:33:50,  1.52it/s]

81


  0%|          | 82/30490 [00:57<6:26:30,  1.31it/s]

82


  0%|          | 83/30490 [00:58<6:14:21,  1.35it/s]

83


  0%|          | 84/30490 [00:59<6:16:21,  1.35it/s]

84


  0%|          | 85/30490 [01:00<6:47:43,  1.24it/s]

85


  0%|          | 86/30490 [01:01<7:12:27,  1.17it/s]

86


  0%|          | 87/30490 [01:01<7:19:06,  1.15it/s]

87


  0%|          | 88/30490 [01:02<5:42:35,  1.48it/s]

88


  0%|          | 89/30490 [01:03<7:32:46,  1.12it/s]

89


  0%|          | 90/30490 [01:04<7:02:01,  1.20it/s]

90


  0%|          | 91/30490 [01:05<6:49:18,  1.24it/s]

91


  0%|          | 92/30490 [01:05<7:07:05,  1.19it/s]

92


  0%|          | 93/30490 [01:06<6:57:26,  1.21it/s]

93


  0%|          | 94/30490 [01:07<5:57:07,  1.42it/s]

94


  0%|          | 95/30490 [01:07<6:12:30,  1.36it/s]

95


  0%|          | 96/30490 [01:08<5:31:53,  1.53it/s]

96


  0%|          | 97/30490 [01:09<5:28:10,  1.54it/s]

97


  0%|          | 98/30490 [01:09<4:58:57,  1.69it/s]

98


  0%|          | 99/30490 [01:10<5:45:36,  1.47it/s]

99


  0%|          | 100/30490 [01:10<5:11:51,  1.62it/s]

100


  0%|          | 101/30490 [01:11<5:23:01,  1.57it/s]

101


  0%|          | 102/30490 [01:12<5:10:20,  1.63it/s]

102


  0%|          | 103/30490 [01:12<5:13:11,  1.62it/s]

103


  0%|          | 104/30490 [01:13<4:51:00,  1.74it/s]

104


  0%|          | 105/30490 [01:13<5:01:36,  1.68it/s]

105


  0%|          | 106/30490 [01:14<4:49:43,  1.75it/s]

106


  0%|          | 107/30490 [01:15<4:57:28,  1.70it/s]

107


  0%|          | 108/30490 [01:15<5:06:27,  1.65it/s]

108


  0%|          | 109/30490 [01:16<5:04:13,  1.66it/s]

109


  0%|          | 110/30490 [01:16<5:20:02,  1.58it/s]

110


  0%|          | 111/30490 [01:17<5:04:33,  1.66it/s]

111


  0%|          | 112/30490 [01:18<5:19:01,  1.59it/s]

112


  0%|          | 113/30490 [01:19<5:52:35,  1.44it/s]

113


  0%|          | 114/30490 [01:19<5:38:32,  1.50it/s]

114


  0%|          | 115/30490 [01:20<5:52:24,  1.44it/s]

115


  0%|          | 116/30490 [01:21<6:04:06,  1.39it/s]

116


  0%|          | 117/30490 [01:21<5:18:36,  1.59it/s]

117


  0%|          | 118/30490 [01:22<4:57:00,  1.70it/s]

118


  0%|          | 119/30490 [01:22<5:10:37,  1.63it/s]

119


## Writing dataframe to .csv

### First part:

In [None]:
#print(df.shape)

#columns = ["id"] + ["F" + str(i) for i in range (1, 29)]
#df.columns = columns

### Combining:

In [None]:
#df = pd.concat([df, frame])

#df.shape

In [None]:
#frame.to_csv('out.csv', index=False)

In [None]:
#print("ab")