<a href="https://colab.research.google.com/github/NozdryakovaMarina/labpy_5/blob/main/lab_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [9]:
import os
import csv
import random
from typing import Any, Tuple, List

import torch
import torch.optim as optim
from torch import nn
from torch.utils.data import Dataset, DataLoader
import torchvision
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import cv2


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
class ImageDataset(Dataset):
  def __init__(self, path_to_annotation_file: str, transform: Any=None, train: bool=False, test: bool=False, valid: bool=False) -> None:
    self.path_to_annotation_file = path_to_annotation_file
    self.data = self.get_data(path_to_annotation_file, train, test, valid)
    self.transform = transform

  def get_data(self, path_to_annotation_file: str, train: bool=False, test: bool=False, valid: bool=False) -> pd.DataFrame:
    data = pd.read_csv(path_to_annotation_file, usecols=(0, 2), names = ('path_to_iamge', 'label'))
    data = data.sample(frac=1).reset_index(drop=True)
    data['label'] = data['label'].apply(lambda l: 0 if l == 'polarbear' else 1)
    train_size = int(0.8 * data.shape[0])
    test_valid_size = int(0.1 * data.shape[0])
    if train == True:
      data = data.iloc[:train_size, :]
    elif test == True:
      data = data.iloc[train_size:train_size + test_valid_size, :]
    elif valid == True:
      data = data.iloc[train_size + test_valid_size:, :]
    return data

  def __len__(self) -> int:
    return len(self.data)

  def __getitem__(self, index: int) -> Tuple[torch.tensor, int]:
    path_to_image = self.data.iloc[index, 0]
    image = cv2.cvtColor(cv2.imread(path_to_image), cv2.COLOR_BGR2RGB)
    label = self.data.iloc[index, 1]

    if self.transform:
      image = self.transform(image)

    return image, label


In [10]:
transforms = torchvision.transforms.Compose([torchvision.transforms.ToTensor(),
                                                    torchvision.transforms.Resize((224, 224)),
                                                    torchvision.transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
                                                    torchvision.transforms.RandomHorizontalFlip(p=0.5)])

In [11]:
train_data = ImageDataset(path_to_annotation_file='/content/drive/MyDrive/lab_py5/annotation.csv', transform=transforms, train = True)
test_data = ImageDataset(path_to_annotation_file='/content/drive/MyDrive/lab_py5/annotation.csv', transform=transforms, test = True)
valid_data = ImageDataset(path_to_annotation_file='/content/drive/MyDrive/lab_py5/annotation.csv', transform=transforms, valid = True)

In [12]:
len(train_data), len(test_data), len(valid_data)

(1763, 220, 221)

In [13]:
train_data.data['label'].value_counts()

1    1763
Name: label, dtype: int64

In [14]:
test_data.data['label'].value_counts()

1    220
Name: label, dtype: int64

In [15]:
valid_data.data['label'].value_counts()

1    221
Name: label, dtype: int64