In [None]:
%load_ext autoreload
%autoreload 2

import os
import sys

from collections import Counter
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle

sys.path.append('./../')
from src.dataset import VehiclePredictorDataset
from src.utils import plot_images

## Config variables (can be moved to yml later)

In [None]:
root_path = './../'
data_path = os.path.join(root_path, 'data')
dataset_path = os.path.join(data_path, 'VMMRdb')
with open(os.path.join(data_path, 'make_model_most_common_100.pkl'), 'rb') as f:
    target_make_model_labels = pickle.load(f)

## Init the Dataset

In [None]:
vp_dataset = VehiclePredictorDataset(root_dir=dataset_path)
num_images = len(vp_dataset)
print(f"num_images: {num_images}")

## Plot a few example images

In [None]:
num_samples = 20
np.random.seed(42)
indices = np.random.choice(num_images, num_samples)
images = [vp_dataset[i][0] for i in indices]
targets = [vp_dataset[i][1] for i in indices]
titles = [f"make: {vp_dataset.idx_to_make[target['make']]}\nmodel: {vp_dataset.idx_to_model[target['model']]}\nyear: {vp_dataset.idx_to_year[target['year']]}" for target in targets]
plot_images(images, titles)

In [None]:
make_counts = Counter(vp_dataset.make_counts)
make_model_counts = Counter(vp_dataset.make_model_counts)
make_model_year_counts = Counter(vp_dataset.make_model_year_counts)
year_counts = Counter(vp_dataset.year_counts)

### Dumping target labels for later use

In [None]:
make_model_most_common_100 = [make_model[0] for make_model in make_model_counts.most_common(100)]
make_model_most_common_200 = [make_model[0] for make_model in make_model_counts.most_common(200)]
make_model_most_common_300 = [make_model[0] for make_model in make_model_counts.most_common(300)]

with open(os.path.join(data_path, 'make_model_most_common_100.pkl'), 'wb') as f_100:
    pickle.dump(make_model_most_common_100, f_100)
    
with open(os.path.join(data_path, 'make_model_most_common_200.pkl'), 'wb') as f_200:
    pickle.dump(make_model_most_common_200, f_200)
    
with open(os.path.join(data_path, 'make_model_most_common_300.pkl'), 'wb') as f_300:
    pickle.dump(make_model_most_common_300, f_300)

## Exploratory Data Analysis

In [None]:
vp_dataset