# Readme
This is predictio notebook. See links to other code:
1. Data exploration in [here](./00_recognize_data.ipynb)
2. Image augmentation [notebook](./02_image_augmentation.ipynb), [class code](./utils/augmentation.py)
3. Training model for time range before 2012y [here](./03_augmented_data_model_before_2012.ipynb)
4. Training model for time range after 2012y [here](./05_augmented_data_model_after_2012.ipynb)

In [1]:
import os
import pandas as pd
import numpy as np
import warnings
from tqdm import tqdm
import cv2
import json
from typing import Callable
from tensorflow.keras.models import load_model

warnings.filterwarnings("ignore")

# Configuration

In [2]:
PATHS = {
    'data': os.path.join(os.getcwd(), 'data'),
    'models': os.path.join(os.getcwd(), 'models'),
    'train': os.path.join(os.getcwd(), 'data', 'train'),
    'test': os.path.join(os.getcwd(), 'data', 'test')
}

def load_datafile_path(file: str) -> str: return os.path.join(PATHS['data'], file)
def load_modelfile_path(file: str) -> str: return os.path.join(PATHS['models'], file)
def load_train_image_path(file: str) -> str: return os.path.join(PATHS['train'], file)
def load_test_image_path(file: str) -> str: return os.path.join(PATHS['test'], file)

# Load data

In [3]:
test_df = pd.read_feather(load_datafile_path('test.ftr'))
test_df_before = test_df[test_df['year'] < 2012]
test_df_after = test_df[test_df['year'] >= 2012]

# Predictions

## Before 2012y

In [4]:
def load_images_names(df: pd.DataFrame) -> list:
    images_paths = df['example_path']
    return [p.split('/')[-1] for p in images_paths]


def load_images(images_names: list, path_func: Callable) -> list:
    images = []
    for i in tqdm(range(len(images_names))):
        images.append(cv2.imread(path_func(images_names[i])))
    
    return np.array(images)


def calculate_predicted_labels(predictions: list) -> list:
    predicted_labels = []

    for i in range(len(predictions)):
        y = np.argmax(predictions[i])
        predicted_labels.append(2) if y else predicted_labels.append(y)
    
    return predicted_labels 

In [5]:
names_before = load_images_names(test_df_before)
images_before = load_images(names_before, load_test_image_path)

100%|██████████| 377/377 [00:00<00:00, 466.88it/s]


In [6]:
model = load_model(load_modelfile_path('augmented_6k_before_2012'))
predictions_before = model.predict(images_before)
predicted_labels = calculate_predicted_labels(predictions_before)
test_df_before['pred'] = predicted_labels



## After 2012y 

In [7]:
names_after = load_images_names(test_df_after)
images_after = load_images(names_after, load_test_image_path)

100%|██████████| 258/258 [00:00<00:00, 467.81it/s]


In [8]:
model = load_model(load_modelfile_path('augmented_6k_after_2012'))
predictions_after = model.predict(images_after)
predicted_labels = calculate_predicted_labels(predictions_after)
test_df_after['pred'] = predicted_labels



# Merge

In [9]:
predictions = (
    pd.concat([test_df_before, test_df_after])
    .reset_index()
    .sort_values('index')
    [['index', 'pred']]
    .set_index('index')
    .to_dict()
)

In [10]:
with open('predictions.json', 'w') as file:
    json.dump(predictions, file)