In [1]:
import pandas as pd
import pandas_datareader.data as pdr
import datetime
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim
import numpy as np
import argparse
from copy import deepcopy # Add Deepcopy for args
from sklearn.metrics import mean_absolute_error
import ssl

import seaborn as sns 
import matplotlib.pyplot as plt

print(torch.__version__)
%matplotlib inline
%pylab inline
pylab.rcParams['figure.figsize'] = (15, 9)

1.7.1
Populating the interactive namespace from numpy and matplotlib


# 커스텀데이터셋 만들기
- Dataset: i번째 레코드 값을 주는 역할을 함 
 : 데이터 요청을 받으면 데이터를 꺼내줌
- DataLoader: 배치 만들어주는 역할 (Chunking) 
- DataLoader에다가 Dataset을 넣어주면 배치사이즈대로 데이터를 뽑아줌 
- 커스텀데이터셋인데 length 및 indexing이 가능함

In [4]:
class dummySet(Dataset):
    def __init__(self, num_data):
        self.x = np.array(list(range(num_data*2))).reshape(-1, 2)
        self.y = np.array(list(range(num_data)))
        
    def __len__(self):
        return len(self.x)
    
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

In [5]:
dataset = dummySet(100)
print(dataset.x)
print(dataset[0])

[[  0   1]
 [  2   3]
 [  4   5]
 [  6   7]
 [  8   9]
 [ 10  11]
 [ 12  13]
 [ 14  15]
 [ 16  17]
 [ 18  19]
 [ 20  21]
 [ 22  23]
 [ 24  25]
 [ 26  27]
 [ 28  29]
 [ 30  31]
 [ 32  33]
 [ 34  35]
 [ 36  37]
 [ 38  39]
 [ 40  41]
 [ 42  43]
 [ 44  45]
 [ 46  47]
 [ 48  49]
 [ 50  51]
 [ 52  53]
 [ 54  55]
 [ 56  57]
 [ 58  59]
 [ 60  61]
 [ 62  63]
 [ 64  65]
 [ 66  67]
 [ 68  69]
 [ 70  71]
 [ 72  73]
 [ 74  75]
 [ 76  77]
 [ 78  79]
 [ 80  81]
 [ 82  83]
 [ 84  85]
 [ 86  87]
 [ 88  89]
 [ 90  91]
 [ 92  93]
 [ 94  95]
 [ 96  97]
 [ 98  99]
 [100 101]
 [102 103]
 [104 105]
 [106 107]
 [108 109]
 [110 111]
 [112 113]
 [114 115]
 [116 117]
 [118 119]
 [120 121]
 [122 123]
 [124 125]
 [126 127]
 [128 129]
 [130 131]
 [132 133]
 [134 135]
 [136 137]
 [138 139]
 [140 141]
 [142 143]
 [144 145]
 [146 147]
 [148 149]
 [150 151]
 [152 153]
 [154 155]
 [156 157]
 [158 159]
 [160 161]
 [162 163]
 [164 165]
 [166 167]
 [168 169]
 [170 171]
 [172 173]
 [174 175]
 [176 177]
 [178 179]
 [180 181]

In [6]:
dataloader = DataLoader(dataset, batch_size=3, shuffle=False)

In [7]:
for batch in dataloader:    
    print(batch)
    break

[tensor([[0, 1],
        [2, 3],
        [4, 5]], dtype=torch.int32), tensor([0, 1, 2], dtype=torch.int32)]


In [8]:
dataloader = DataLoader(dataset, batch_size=3, shuffle=False, drop_last=True) # 마지막 사이즈다른 배치는 버림 

In [9]:
for X, y in dataloader:    
    print(X,y)
    print(X.shape, y.shape)    

tensor([[0, 1],
        [2, 3],
        [4, 5]], dtype=torch.int32) tensor([0, 1, 2], dtype=torch.int32)
torch.Size([3, 2]) torch.Size([3])
tensor([[ 6,  7],
        [ 8,  9],
        [10, 11]], dtype=torch.int32) tensor([3, 4, 5], dtype=torch.int32)
torch.Size([3, 2]) torch.Size([3])
tensor([[12, 13],
        [14, 15],
        [16, 17]], dtype=torch.int32) tensor([6, 7, 8], dtype=torch.int32)
torch.Size([3, 2]) torch.Size([3])
tensor([[18, 19],
        [20, 21],
        [22, 23]], dtype=torch.int32) tensor([ 9, 10, 11], dtype=torch.int32)
torch.Size([3, 2]) torch.Size([3])
tensor([[24, 25],
        [26, 27],
        [28, 29]], dtype=torch.int32) tensor([12, 13, 14], dtype=torch.int32)
torch.Size([3, 2]) torch.Size([3])
tensor([[30, 31],
        [32, 33],
        [34, 35]], dtype=torch.int32) tensor([15, 16, 17], dtype=torch.int32)
torch.Size([3, 2]) torch.Size([3])
tensor([[36, 37],
        [38, 39],
        [40, 41]], dtype=torch.int32) tensor([18, 19, 20], dtype=torch.int32)
torch.S