# Custom Dataset

```
  class CustomDataset(torch.utils.data.Dataset): 
    def __init__(self):
    데이터셋의 전처리를 해주는 부분

    def __len__(self):
    데이터셋의 길이. 즉, 총 샘플의 수를 적어주는 부분

    def __getitem__(self, idx):
    데이터셋에서 특정 1개의 샘플을 가져오는 함수
```

In [10]:
import os
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision.io import read_image

class CustomImageDataset(Dataset):
    def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(annotations_file)
        self.img_dir = img_dir 
        self.transform = transform
        self.target_transform = target_transform
    def __len__(self):
        return len(self.img_labels)
    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = read_image(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        sample = {"image" : image, "label": label}
        return sample

In [11]:
import torch
import torch.nn.functional as fn
class CustomDataset(Dataset):
    def __init__(self):
        self.x_data = [[73, 80, 75],
                    [93, 88, 93],
                    [89, 91, 90],
                    [96, 98, 100],
                    [73, 66, 70]]
        self.y_data = [[152], [185], [180], [196], [142]]
    def __len__(self):
        return len(self.x_data)
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])
        return x,y

In [14]:
dataset = CustomDataset()
dataloader = DataLoader(dataset, batch_size=2, shuffle =True)

In [15]:
model = torch.nn.Linear(3,1)
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-5)

In [16]:
epochs = 20
for epoch in range(epochs +1):
    for batch_idx, samples in enumerate(dataloader):
        x_train, y_train = samples
        prediction = model(x_train)
        cost = fn.mse_loss(prediction,y_train)

        optimizer.zero_grad()
        cost.backward()
        optimizer.step()

        print('Epoch {:4d}/{} Batch {}/{} Cost : {:.6f}'.format(epoch, epochs, batch_idx+1,len(dataloader),cost.item()))

Epoch    0/20 Batch 1/3 Cost : 5765.828125
Epoch    0/20 Batch 2/3 Cost : 4290.398438
Epoch    0/20 Batch 3/3 Cost : 848.614990
Epoch    1/20 Batch 1/3 Cost : 180.873672
Epoch    1/20 Batch 2/3 Cost : 84.239883
Epoch    1/20 Batch 3/3 Cost : 30.212414
Epoch    2/20 Batch 1/3 Cost : 4.488306
Epoch    2/20 Batch 2/3 Cost : 0.675307
Epoch    2/20 Batch 3/3 Cost : 5.896562
Epoch    3/20 Batch 1/3 Cost : 1.317400
Epoch    3/20 Batch 2/3 Cost : 0.112673
Epoch    3/20 Batch 3/3 Cost : 1.020151
Epoch    4/20 Batch 1/3 Cost : 0.145985
Epoch    4/20 Batch 2/3 Cost : 0.054036
Epoch    4/20 Batch 3/3 Cost : 3.633440
Epoch    5/20 Batch 1/3 Cost : 0.825133
Epoch    5/20 Batch 2/3 Cost : 1.662374
Epoch    5/20 Batch 3/3 Cost : 0.021213
Epoch    6/20 Batch 1/3 Cost : 0.006478
Epoch    6/20 Batch 2/3 Cost : 1.446777
Epoch    6/20 Batch 3/3 Cost : 1.183705
Epoch    7/20 Batch 1/3 Cost : 0.108374
Epoch    7/20 Batch 2/3 Cost : 2.107424
Epoch    7/20 Batch 3/3 Cost : 0.028723
Epoch    8/20 Batch 1/3 Cost

In [18]:
new_var = torch.FloatTensor([[73,80,75]])
pred_y = model(new_var)
print(pred_y)

tensor([[150.2986]], grad_fn=<AddmmBackward>)
