# overview

In [1]:
# default package
import logging
import sys
import os 
import pathlib

In [53]:
# third party package
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import pandas_profiling as pdp
import torch
from torch import nn
from torchsummary import summary

In [3]:
# my package
sys.path.append(os.path.join(pathlib.Path().resolve(),"../"))

In [4]:
# reload settings
%load_ext autoreload
%autoreload 2

In [5]:
# logger
logger=logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)

In [6]:
# graph settings
sns.set()
%matplotlib inline

In [7]:
def cd_project_root_()->None:
    current=pathlib.Path().cwd()
    if current.stem=="notebooks":
        os.chdir(current.parent)
    logger.info(f"current directory: {pathlib.Path().cwd()}")

cd_project_root_()

INFO:__main__:current directory: /workspaces/load_to_goal/Kaggle/kaggle-google-brain


In [55]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## EDA

In [8]:
df_train=pd.read_csv("./data/raw/train.csv")
df_test=pd.read_csv("./data/raw/test.csv")
df_sample=pd.read_csv("./data/raw/sample_submission.csv")

In [12]:
df_train.loc[df_train["breath_id"]==1,["u_in","pressure"]]

Unnamed: 0,u_in,pressure
0,0.083334,5.837492
1,18.383041,5.907794
2,22.509278,7.876254
3,22.808822,11.742872
4,25.355850,12.234987
...,...,...
75,4.974474,6.399909
76,4.978481,6.610815
77,4.981847,6.329607
78,4.984683,6.540513


In [16]:
len(df_train.groupby("breath_id").groups)

75450

In [17]:
75450*80

6036000

In [20]:
len(df_train["breath_id"].unique())

75450

In [22]:
len(df_test["breath_id"].unique())

50300

In [23]:
df_test

Unnamed: 0,id,breath_id,R,C,time_step,u_in,u_out
0,1,0,5,20,0.000000,0.000000,0
1,2,0,5,20,0.031904,7.515046,0
2,3,0,5,20,0.063827,14.651675,0
3,4,0,5,20,0.095751,21.230610,0
4,5,0,5,20,0.127644,26.320956,0
...,...,...,...,...,...,...,...
4023995,4023996,125748,20,10,2.530117,4.971245,1
4023996,4023997,125748,20,10,2.563853,4.975709,1
4023997,4023998,125748,20,10,2.597475,4.979468,1
4023998,4023999,125748,20,10,2.631134,4.982648,1


## network

In [34]:
r_map = {5: 0, 20: 1, 50: 2}
c_map = {10: 0, 20: 1, 50: 2}
df_train["R"] = df_train["R"].map(r_map)
df_train["C"] = df_train["C"].map(c_map)

In [36]:
tensor=torch.tensor(df_train.loc[df_train["breath_id"]==1,["R","C"]].values).view(80,-1)
r_emb = nn.Embedding(3, 2, padding_idx=0)
out=r_emb(tensor[:,0])

In [68]:
out2=torch.cat((out,out),1).view(1,80,-1)

In [69]:
out2.shape

torch.Size([1, 80, 4])

In [70]:
layer=nn.Linear(4,5)

In [71]:
layer(out2).shape

torch.Size([1, 80, 5])

In [125]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1=nn.Linear(4,5)
        self.lstm = nn.LSTM(
            5,
            5,
            dropout=0.2,
            batch_first=True,
            bidirectional=True,
        )
        self.layer2=nn.Linear(10,10)
        self.layer3=nn.Linear(10,1)
    
    def forward(self,x):
        out=self.layer1(x)
        out=self.lstm(out)[0]
        out=self.layer2(out)
        out=self.layer3(out)
        return out

In [126]:
model=Model().to(device)
out2=out2.to(device)

In [127]:
model.forward(out2).shape

torch.Size([1, 80, 1])

In [123]:
summary(model, (1,10, 4))

RuntimeError: input must have 3 dimensions, got 4

## check

In [132]:
a=b=1

In [130]:
b

1