# Study room

## Layer Norm for 1d data

In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


In [2]:
features = torch.randn(2, 3)

In [3]:
features

tensor([[-0.3726,  0.5572,  0.0804],
        [-1.3519, -0.9464,  1.0852]])

In [5]:
layer_norm = nn.LayerNorm(3)

In [6]:
layer_norm(features)

tensor([[-1.2141,  1.2350, -0.0210],
        [-0.8887, -0.5084,  1.3971]], grad_fn=<NativeLayerNormBackward>)

In [9]:
batch_norm = nn.BatchNorm1d(3)

In [10]:
batch_norm(features)

tensor([[ 1.0000,  1.0000, -1.0000],
        [-1.0000, -1.0000,  1.0000]], grad_fn=<NativeBatchNormBackward>)

## Make feature columns

In [1]:
import pandas as pd

In [2]:
test = pd.read_csv("../../inputs/example_test.csv")

In [3]:
test.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Columns: 303 entries, row_id to f_299
dtypes: float64(300), int64(2), object(1)
memory usage: 21.4+ KB


In [4]:
test

Unnamed: 0,row_id,time_id,investment_id,f_0,f_1,f_2,f_3,f_4,f_5,f_6,...,f_290,f_291,f_292,f_293,f_294,f_295,f_296,f_297,f_298,f_299
0,1220_1,1220,1,0.874944,0.457875,0.962572,0.372061,0.004509,0.971304,0.097477,...,0.514031,0.828523,0.437697,0.810764,0.693963,0.186403,0.569144,0.120386,0.419603,0.936966
1,1220_2,1220,2,0.554862,0.445478,0.714034,0.891988,0.703331,0.264478,0.600636,...,0.620919,0.30154,0.687464,0.400168,0.908198,0.272901,0.22243,0.748324,0.858802,0.362829
2,1221_0,1221,0,0.17507,0.918464,0.044379,0.505868,0.03716,0.169001,0.211365,...,0.39771,0.789382,0.238078,0.254068,0.795084,0.491928,0.376725,0.079489,0.995041,0.482283
3,1221_1,1221,1,0.355239,0.689773,0.452643,0.134863,0.794958,0.837197,0.036773,...,0.893571,0.322879,0.674692,0.11416,0.818483,0.287065,0.710537,0.550434,0.899869,0.928997
4,1221_2,1221,2,0.912778,0.462684,0.454067,0.491009,0.050547,0.250051,0.686295,...,0.031662,0.703427,0.861476,0.897927,0.659051,0.356148,0.37599,0.209798,0.175153,0.72716
5,1222_0,1222,0,0.678116,0.158996,0.058148,0.253677,0.984982,0.323493,0.30799,...,0.439604,0.487491,0.080697,0.53975,0.246645,0.435054,0.436709,0.315042,0.133657,0.731483
6,1222_1,1222,1,0.769912,0.060028,0.676047,0.85355,0.411155,0.747515,0.649063,...,0.419152,0.659254,0.198072,0.655397,0.428209,0.769999,0.960219,0.734675,0.028576,0.320976
7,1222_2,1222,2,0.738876,0.980038,0.696796,0.591012,0.882052,0.311484,0.729229,...,0.341144,0.758328,0.010728,0.276859,0.979913,0.486605,0.67014,0.515824,0.580929,0.139308
8,1223_0,1223,0,0.281445,0.571472,0.986828,0.021403,0.789577,0.005765,0.409379,...,0.357828,1.9e-05,0.50453,0.169174,0.216349,0.503643,0.052721,0.795965,0.909602,0.291668


In [31]:
features = test.columns[1:]
features

Index(['time_id', 'investment_id', 'f_0', 'f_1', 'f_2', 'f_3', 'f_4', 'f_5',
       'f_6', 'f_7',
       ...
       'f_290', 'f_291', 'f_292', 'f_293', 'f_294', 'f_295', 'f_296', 'f_297',
       'f_298', 'f_299'],
      dtype='object', length=302)

In [32]:
prev_features = list(map(lambda x: "p_" + x, features))
len(prev_features)

302

In [33]:
test_group = test[features].groupby("investment_id")

In [34]:
test_group.diff()

Unnamed: 0,time_id,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,f_8,...,f_290,f_291,f_292,f_293,f_294,f_295,f_296,f_297,f_298,f_299
0,,,,,,,,,,,...,,,,,,,,,,
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,1.0,-0.519705,0.231898,-0.509929,-0.237198,0.790449,-0.134107,-0.060703,0.467385,0.255965,...,0.379541,-0.505644,0.236995,-0.696604,0.124521,0.100662,0.141392,0.430047,0.480267,-0.007969
4,1.0,0.357916,0.017206,-0.259968,-0.400979,-0.652784,-0.014427,0.085659,-0.030138,-0.120817,...,-0.589257,0.401887,0.174012,0.497759,-0.249147,0.083247,0.153559,-0.538525,-0.68365,0.364331
5,1.0,0.503046,-0.759468,0.01377,-0.25219,0.947822,0.154492,0.096625,-0.107346,-0.578049,...,0.041894,-0.301891,-0.157381,0.285681,-0.54844,-0.056874,0.059984,0.235553,-0.861383,0.2492
6,1.0,0.414673,-0.629745,0.223404,0.718687,-0.383803,-0.089682,0.61229,-0.08493,-0.725127,...,-0.474419,0.336375,-0.47662,0.541236,-0.390274,0.482934,0.249682,0.184242,-0.871293,-0.608021
7,1.0,-0.173901,0.517355,0.242729,0.100003,0.831505,0.061433,0.042934,-0.150542,0.447207,...,0.309482,0.054901,-0.850748,-0.621069,0.320862,0.130456,0.294151,0.306025,0.405777,-0.587852
8,1.0,-0.396671,0.412476,0.92868,-0.232274,-0.195405,-0.317728,0.101389,0.268093,0.369165,...,-0.081776,-0.487472,0.423832,-0.370576,-0.030296,0.068589,-0.383988,0.480923,0.775945,-0.439816


In [35]:
test_ids = test.copy()

In [37]:
test_ids.set_index("investment_id")

Unnamed: 0_level_0,row_id,time_id,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,...,f_290,f_291,f_292,f_293,f_294,f_295,f_296,f_297,f_298,f_299
investment_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,1220_1,1220,0.874944,0.457875,0.962572,0.372061,0.004509,0.971304,0.097477,0.24608,...,0.514031,0.828523,0.437697,0.810764,0.693963,0.186403,0.569144,0.120386,0.419603,0.936966
2,1220_2,1220,0.554862,0.445478,0.714034,0.891988,0.703331,0.264478,0.600636,0.697096,...,0.620919,0.30154,0.687464,0.400168,0.908198,0.272901,0.22243,0.748324,0.858802,0.362829
0,1221_0,1221,0.17507,0.918464,0.044379,0.505868,0.03716,0.169001,0.211365,0.791252,...,0.39771,0.789382,0.238078,0.254068,0.795084,0.491928,0.376725,0.079489,0.995041,0.482283
1,1221_1,1221,0.355239,0.689773,0.452643,0.134863,0.794958,0.837197,0.036773,0.713466,...,0.893571,0.322879,0.674692,0.11416,0.818483,0.287065,0.710537,0.550434,0.899869,0.928997
2,1221_2,1221,0.912778,0.462684,0.454067,0.491009,0.050547,0.250051,0.686295,0.666958,...,0.031662,0.703427,0.861476,0.897927,0.659051,0.356148,0.37599,0.209798,0.175153,0.72716
0,1222_0,1222,0.678116,0.158996,0.058148,0.253677,0.984982,0.323493,0.30799,0.683907,...,0.439604,0.487491,0.080697,0.53975,0.246645,0.435054,0.436709,0.315042,0.133657,0.731483
1,1222_1,1222,0.769912,0.060028,0.676047,0.85355,0.411155,0.747515,0.649063,0.628536,...,0.419152,0.659254,0.198072,0.655397,0.428209,0.769999,0.960219,0.734675,0.028576,0.320976
2,1222_2,1222,0.738876,0.980038,0.696796,0.591012,0.882052,0.311484,0.729229,0.516416,...,0.341144,0.758328,0.010728,0.276859,0.979913,0.486605,0.67014,0.515824,0.580929,0.139308
0,1223_0,1223,0.281445,0.571472,0.986828,0.021403,0.789577,0.005765,0.409379,0.952,...,0.357828,1.9e-05,0.50453,0.169174,0.216349,0.503643,0.052721,0.795965,0.909602,0.291668


In [43]:
for i, row in test.iterrows():
    print(row[["time_id", "f_0"]])

time_id        1220
f_0        0.874944
Name: 0, dtype: object
time_id        1220
f_0        0.554862
Name: 1, dtype: object
time_id       1221
f_0        0.17507
Name: 2, dtype: object
time_id        1221
f_0        0.355239
Name: 3, dtype: object
time_id        1221
f_0        0.912778
Name: 4, dtype: object
time_id        1222
f_0        0.678116
Name: 5, dtype: object
time_id        1222
f_0        0.769912
Name: 6, dtype: object
time_id        1222
f_0        0.738876
Name: 7, dtype: object
time_id        1223
f_0        0.281445
Name: 8, dtype: object


In [5]:
import sys

sys.path.append("..")
sys.path.append("../../inputs")

In [9]:
import ubiquant

In [10]:
env = ubiquant.make_env()  # initialize the environment
iter_test = env.iter_test()  # an iterator which loops over the test set and sample submission

In [14]:
store = pd.DataFrame(columns=test.columns)

In [15]:
store.info

<bound method DataFrame.info of Empty DataFrame
Columns: [row_id, time_id, investment_id, f_0, f_1, f_2, f_3, f_4, f_5, f_6, f_7, f_8, f_9, f_10, f_11, f_12, f_13, f_14, f_15, f_16, f_17, f_18, f_19, f_20, f_21, f_22, f_23, f_24, f_25, f_26, f_27, f_28, f_29, f_30, f_31, f_32, f_33, f_34, f_35, f_36, f_37, f_38, f_39, f_40, f_41, f_42, f_43, f_44, f_45, f_46, f_47, f_48, f_49, f_50, f_51, f_52, f_53, f_54, f_55, f_56, f_57, f_58, f_59, f_60, f_61, f_62, f_63, f_64, f_65, f_66, f_67, f_68, f_69, f_70, f_71, f_72, f_73, f_74, f_75, f_76, f_77, f_78, f_79, f_80, f_81, f_82, f_83, f_84, f_85, f_86, f_87, f_88, f_89, f_90, f_91, f_92, f_93, f_94, f_95, f_96, ...]
Index: []

[0 rows x 303 columns]>

In [None]:
for test_df, sample_prediction_df in iter_test:

    
    