# BiteMe | Train

This notebook includes the most important part of the project - the modelling. The notebook tests methodologies for training, and in it the chosen algorithm is decided. Validation also o

In [1]:
# Basic imports
import pandas as pd
import numpy as np
import os
import sys

# Data visualisation
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn

# Modelling imports
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

# Image processing
import cv2
import albumentations as A
import imgaug as ia
import imgaug.augmenters as iaa

# Local imports
sys.path.append("..")
from helpers import read_images
from constants import ROWS, COLS, CHANNELS, TEST_SIZE, SEED, N_SPLITS, VERBOSE

plt.rcParams["figure.figsize"] = (14, 8)

np.random.seed(SEED)
ia.seed(SEED)

In [7]:
# Define directories
base_dir_path = "../"

data_dir_path = os.path.join(base_dir_path, "data")
data_preprocessed_train_dir_path = os.path.join(data_dir_path, "preprocessed/train")

data_dir = os.listdir(data_dir_path)
data_preprocessed_train_dir = os.listdir(data_preprocessed_train_dir_path)

metadata_preprocessed_path = os.path.join(data_preprocessed_dir_path, "metadata.csv")
metadata = pd.read_csv(metadata_preprocessed_path)

metadata.head()

Unnamed: 0,img_name,img_path,label,split
0,7059b14d2aa03ed6c4de11afa32591995181d31c.jpg,../data/cleaned/none/7059b14d2aa03ed6c4de11afa...,none,train
1,ea1b100b581fcdb7ddfae52cc62347a99e304ba4.jpg,../data/cleaned/none/ea1b100b581fcdb7ddfae52cc...,none,train
2,1a1442990ff143b7560e5757d9f76d37ab007f48.jpg,../data/cleaned/none/1a1442990ff143b7560e5757d...,none,test
3,6eac051b9c45ff6821ec8675216f371711b7cea9.jpg,../data/cleaned/none/6eac051b9c45ff6821ec86752...,none,train
4,fc72767f8520df9b2b83941077dc0ee013eb9399.jpg,../data/cleaned/none/fc72767f8520df9b2b8394107...,none,train


In [9]:
img_array = read_images(
    data_dir_path=data_preprocessed_train_dir_path, 
    rows=ROWS, 
    cols=COLS, 
    channels=CHANNELS, 
    write_images=False, 
    output_data_dir_path=None,
    verbose=VERBOSE
)

Reading images from: ../data/preprocessed/train
Rows set to 512
Columns set to 512
Channels set to 3
Writing images is set to: False
Reading images...


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 191.04it/s]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████| 26/26 [00:00<00:00, 102.07it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 21/21 [00:00<00:00, 71.99it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 56.42it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 45.17it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 22/22 [00:00<00:00, 38.34it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:00<00:00, 33.15it/s]
100%|█

Image reading complete.
Image array shape: (192, 512, 512, 3)


## Set Parameters

In [None]:
config = {
    "rows": ROWS,
    "cols": COLS,
    "channels": CHANNELS,
    "test_size": TEST_SIZE
    "seed": SEED,
    "n_splits": N_SPLITS,
    "batch_size": 8,
    "augs": augs
}

In [None]:
skf = StratifiedKFold(n_splits=3)
for train_index, test_index in skf.split(metadata.index, metadata["label"]):
    print(train_index)
    print("-"*40)