## **1. Dataset Preprocessing**

In [9]:
import os
from matplotlib.transforms import Bbox
import numpy as np
import pandas as pd
import pytesseract
import cv2
from PIL import Image
import matplotlib.pyplot as plt

from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [10]:
LATITUDE_MIN = 42.617
LATITUDE_MAX = 46.550
LONGITUDE_MIN = 13.490
LONGITUDE_MAX = 19.451

In [11]:
DATA_CSV_PATH = '../input/lm-dataset/data.csv'

In [12]:
DATA_FOLDERS_PATH = '../input/lm-dataset/data/data/'

In [13]:

def make_bounding_boxes(n_rows, n_cols):
    LONGITUDE_DIF = (LONGITUDE_MAX - LONGITUDE_MIN) / (n_cols - 1)
    LATITUDE_DIF = (LATITUDE_MAX - LATITUDE_MIN) / (n_rows - 1)
    BBs = []
    for i in range(n_rows):
        row = []
        for j in range(n_cols):
            row.append([LONGITUDE_MIN + j * LONGITUDE_DIF, #LEFT 
                         LONGITUDE_MIN + (j+1) * LONGITUDE_DIF, #RIGHT
                         LATITUDE_MAX - (i+1) * LATITUDE_DIF, #DOWN
                         LATITUDE_MAX - i * LATITUDE_DIF]) #UP
        BBs.append(row)
    return BBs

def add_labels(n_rows, n_cols):
    BBs = make_bounding_boxes(n_rows,n_cols)
    df = pd.read_csv(DATA_CSV_PATH)
    df = df.drop(['group'], axis=1, errors='ignore')

    group_col = []

    for ind in df.index:
        group = 0
        for i in range(n_rows):
            for j in range(n_cols):
                if (BBs[i][j][0] <= df['longitude'][ind] <= BBs[i][j][1]
                and BBs[i][j][2] <= df['latitude'][ind] <= BBs[i][j][3]):
                    group_col.append(group)

                group += 1

    df.insert(3, 'group', group_col, True)
    df.to_csv("data_processed.csv", index=False)

def calculate_group_center(n_rows, n_cols):
    columns = ['group', 'center_longitude', 'center_latitude']
    total = n_rows * n_cols
    df = pd.read_csv('data_processed.csv')
    df = df.drop(['center_longitude', 'center_latitude'], axis=1, errors='ignore')
    df.insert(4, "center_latitude", 0, True)
    df.insert(5, "center_longitude", 0, True)

    df_ref = pd.DataFrame(columns = columns)
    df_ref.set_index('group', inplace = True)

    for i in range(total):
        result_df = df[df['group'] == i]
        longitude_mean = result_df['longitude'].mean()
        latitude_mean = result_df['latitude'].mean()
        df.loc[df['group'] == i, ['center_longitude']] = longitude_mean
        df.loc[df['group'] == i, ['center_latitude']] = latitude_mean
        values = [i, longitude_mean, latitude_mean]
        zipped = dict(zip(columns, values))
        df_ref = df_ref.append(zipped, True)
    
    df.to_csv("data_processed.csv", index=False)
    df_ref.to_csv('group_reference.csv')

def preprocess_data(n_rows, n_cols):
    add_labels(n_rows, n_cols)
    calculate_group_center(n_rows, n_cols)

In [14]:
preprocess_data(11, 11)

In [None]:
import Levenshtein
def list_files(dir):
    r = []
    results = pd.DataFrame(columns = {'image', 'read'})
    for root, dirs, files in os.walk(dir): 
        for name in files:
            filename = os.path.join(root, name)
            img = cv2.imread(filename, cv2.IMREAD_UNCHANGED)
            img = img[625:640, 600:640]
            img = cv2.resize(img, None, fx=2, fy=2, interpolation=Image.LANCZOS)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            kernel = np.ones((1, 1), np.uint8)
            img = cv2.dilate(img, kernel, iterations=1)
            img = cv2.erode(img, kernel, iterations=1)

            read = pytesseract.image_to_string(img, config="--psm 6")
            read = ''.join(ch for ch in read if ch.isalnum())
    
            results = results.append({'image': filename, 'read': read}, ignore_index = True)
            
            i += 1
       
    return results

def similar(a, b):
    return Levenshtein.ratio(a, b)


def replace_with_similar(words):
    for i in range(len(words)):
        current = words[i]
        for j in range(i, len(words)):
            similarity = similar(current, words[j])
            if similarity >= 0.75 and similarity < 1.0:
                words[j] = words[i]
def most_frequent(List):
    return max(set(List), key = List.count)

    return words
reads = list_files(DATA_FOLDERS_PATH)
words_unedited = list(reads.groupby('read').count().sort_values('image', ascending = False).index)
w = words_unedited[:]
words_edited = replace_with_similar(w)
zipped = dict(zip(words_unedited, words_edited))
for idx in reads.index:
    reads.iloc[idx]['read'] = zipped[reads.iloc[idx]['read']]
    
folders = pd.DataFrame(columns = {'folder', 'read'})

main = 'Google'
mistakes = ['beter']
for i, g in reads.groupby(reads.index // 4):
    folder = g['image'].values[0]
    if main in g['read'].values:
        replace = main
    else:
        replace = most_frequent(list(g['read'].values))
        if replace in mistakes:
            replace = main
    folders = folders.append({'folder': folder[:folder.rindex('/')], 'read': replace}, ignore_index = True)

non_google = pd.DataFrame(columns = {'folder', 'read'})
for idx in folders.index:
    current = folders.iloc[idx]
    if current.read != 'Google':
        non_google = non_google.append({'folder': current.folder, 'read': current.read}, ignore_index = True)
filtered = non_google

In [15]:
import tensorflow as tf
tf.keras.backend.clear_session()

df = pd.read_csv('data_processed.csv')
reads = pd.read_pickle('../input/weight-update/filtered.pkl')
to_ignore = reads['folder'].values
for i in range(len(to_ignore)):
    to_ignore[i] = to_ignore[i][to_ignore[i].rindex('/')+1:]


In [16]:
df = df[~df['uuid'].isin(to_ignore)]

In [17]:
new = df.drop_duplicates(subset = ['group'])
group_reference = new[['center_latitude', 'center_longitude', 'group']]
group_reference = group_reference.set_index('group', drop = False)

In [18]:

a = df.groupby('group').count()['uuid']
b = dict(a)
n = 100
def find_nearest_group(df_ref, current_group, center_latitude, n = 100):
    df_ref['occurrances'] = df_ref.group.map(b)
    result_index = df_ref[(df_ref.group != current_group) &   (df_ref.occurrances > n)]['center_latitude'].sub(center_latitude).abs().idxmin()
    return result_index


def replace_groups(df, a):
    for row in df.values:
        num_of_occurances = a.loc[row[3]]
        if num_of_occurances < n:
            replace_group = find_nearest_group(group_reference, row[3], row[5], n)
            df.loc[df.uuid == row[0], 'group'] = replace_group
            df.loc[df.uuid == row[0], 'center_latitude'] = group_reference.loc[replace_group]['center_latitude']
            df.loc[df.uuid == row[0], 'center_longitude'] = group_reference.loc[replace_group]['center_longitude']
            a = df.groupby('group').count()['uuid']
    return df

df = replace_groups(df, a)
        
df['group'] = df['group'].astype(str)
Y = df['group']
X = df
            

In [19]:
new = df.drop_duplicates(subset = ['group'])
group_reference = new[['center_latitude', 'center_longitude', 'group']]
group_reference = group_reference.set_index('group', drop = False)

### Trait-validation-test split

In [20]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, Y,
    test_size=0.25, shuffle = True, random_state = 42)

X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, 
    test_size=0.25, random_state = 42)


In [21]:
def expand_dataset(df):
    image_names = ['0.jpg', '90.jpg', '180.jpg', '270.jpg']
    res = pd.DataFrame(columns = df.columns)
    for i in df.values:
        images = [i[0] + '/' + j for j in image_names]
        for image in images:
            df_help = pd.DataFrame({'uuid': image, 'latitude': i[1], 'longitude': i[2], 'group': i[3], 'center_latitude': i[4], 'center_longitude': i[5]}, index = [0])
            res = res.append(df_help, ignore_index = True)

    return res

In [22]:
X_train_exp = expand_dataset(X_train)
X_test_exp = expand_dataset(X_test)
X_val_exp = expand_dataset(X_val)

In [23]:
#Plotting the train set before balancing
X_train_exp['group'] = X_train_exp['group'].astype(int)
X_train_exp.hist(column = 'group', figsize = (15, 10), bins = 121)
X_train_exp['group'] = X_train_exp['group'].astype(str)

In [None]:
#Checking if there is any data leakage between the train set and the validation set
#If there is no leakage, check dataframe should be empty
check = pd.merge(X_train_exp, X_val_exp, how = 'inner', on = ['uuid'])
check.head(10)

In [24]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def balance_data(df, target_size = 4000):
    df_groups = df.groupby(['group'])
    df_balanced = pd.DataFrame({key:[] for key in df.keys()})

    for i in df_groups.groups.keys():
        df_group = df_groups.get_group(i)
        df_label = df_group.sample(frac = 1)
        current_size = len(df_label)
  
        if current_size < target_size:
            repeat, mod = divmod(target_size, current_size)

            df_label_new = pd.concat([df_label]*repeat, ignore_index = True, axis = 0)

            df_label_remainder = df_group.sample(n = mod)

            df_label_new = pd.concat([df_label_new, df_label_remainder], ignore_index = True, axis = 0)

            df_balanced = pd.concat([df_balanced, df_label_new],ignore_index=True,axis=0)

    exclude_columns = list(df_balanced['group'].unique())
    df.drop(df.index[df['group'].isin(exclude_columns)], inplace = True)
    df_balanced = pd.concat([df_balanced, df], ignore_index = True, axis = 0)
    
    return df_balanced



In [25]:
#All images that belong to a certain group are duplicated until the target size has been reached
X_train_bal = balance_data(X_train_exp)
X_val_bal = balance_data(X_val_exp, 1000)
X_test_bal = balance_data(X_test_exp, 1000)


In [None]:
#Plotting the train set after balancing
X_train_bal['group'] = X_train_bal['group'].astype(int)
X_train_bal.hist(column = 'group', figsize = (15, 10), bins = 121)
X_train_bal['group'] = X_train_bal['group'].astype(str)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
target_size = (260, 260)
batch_size = 32
target_size_B0 = (224, 224)
batch_size_B0 = 64
df['group'] = df['group'].astype(str)

#Defining the parameters for augmentation that will be used on the train set
datagen = ImageDataGenerator(
    rotation_range = 45,
    width_shift_range = 0.2,
    height_shift_range = 0.2,
    shear_range = 0.2,
    zoom_range = 0.2,
    horizontal_flip = True,
    fill_mode = 'reflect', cval = 125
)
test_datagen = ImageDataGenerator()

train_generator = datagen.flow_from_dataframe(
    dataframe = X_train_bal,
    x_col = 'uuid',
    y_col = 'group',
    target_size = target_size,
    directory = DATA_FOLDERS_PATH,
    class_mode = 'categorical',
    batch_size = batch_size,
    shuffle = True
)
validation_generator = test_datagen.flow_from_dataframe(
    dataframe = X_val_bal,
    x_col = 'uuid',
    y_col = 'group',
    target_size = target_size,
    directory = DATA_FOLDERS_PATH,
    class_mode = 'categorical',
    batch_size = batch_size,
    shuffle = False
)

train_generator_B0 = datagen.flow_from_dataframe(
    dataframe = X_train_bal,
    x_col = 'uuid',
    y_col = 'group',
    target_size = target_size_B0,
    directory = DATA_FOLDERS_PATH,
    class_mode = 'categorical',
    batch_size = batch_size_B0,
    shuffle = True
)
validation_generator_B0 = test_datagen.flow_from_dataframe(
    dataframe = X_val_bal,
    x_col = 'uuid',
    y_col = 'group',
    target_size = target_size_B0,
    directory = DATA_FOLDERS_PATH,
    class_mode = 'categorical',
    batch_size = batch_size_B0,
    shuffle = False
)


#### Downloading weight updates for EfficientNetB0 and B2

In [None]:
!wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/noisystudent/noisy_student_efficientnet-b0.tar.gz
!tar -xf noisy_student_efficientnet-b0.tar.gz
!python ../input/weight-update/efficientnet_weight_update_util.py --model b0 --notop --ckpt ./noisy_student_efficientnet-b0/model.ckpt --o efficientnetb0_notop.h5

In [None]:
!wget https://storage.googleapis.com/cloud-tpu-checkpoints/efficientnet/noisystudent/noisy_student_efficientnet-b2.tar.gz
!tar -xf noisy_student_efficientnet-b2.tar.gz
!python ../input/weight-update/efficientnet_weight_update_util.py --model b2 --notop --ckpt ./noisy_student_efficientnet-b2/model.ckpt --o efficientnetb2_notop.h5

## **2. Setting up the models**

In [None]:
from tensorflow.keras.applications import InceptionResNetV2, EfficientNetB0, EfficientNetB2, MobileNetV3Large
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout, Input
from tensorflow.keras.models import Model, model_from_json
#input_shape = (260,260,3)

def get_model(input_shape, type_ = 'B2'):
    if type_ == 'B2':
        base_model = EfficientNetB2(weights='efficientnetb2_notop.h5',include_top=False, input_shape = input_shape)
    elif type_ == 'B0':
        base_model = EfficientNetB0(weights='efficientnetb0_notop.h5',include_top=False, input_shape = input_shape)
    else:
        base_model = MobileNetV3Large(include_top=False)
    return base_model

In [None]:
def init_model(input_shape, type_ = 'B2'):
    inputs = Input(shape = input_shape)
    base_model = get_model(input_shape, type_)
    
    #Trainable has been set to True because of regularization
    base_model.trainable = True
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(1024, activation = 'relu')(x)
    x = Dropout(0.5)(x)
    outputs = Dense(35, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=outputs)
   
    return model





In [None]:
batch_size = 32
batch_size_B0 = 64

In [None]:
from tensorflow.keras import regularizers
import os 
import tempfile
def add_regularization(model, regularizer = regularizers.l2(0.0001)):

    if not isinstance(regularizer, regularizers.Regularizer):
        return model

    for layer in model.layers:
        for attr in ['kernel_regularizer']:
            if hasattr(layer, attr):
                setattr(layer, attr, regularizer)

    model_json = model.to_json()

    tmp_weights_path = os.path.join(tempfile.gettempdir(), 'tmp_weights.h5')
    model.save_weights(tmp_weights_path)

    model = model_from_json(model_json)

    model.load_weights(tmp_weights_path, by_name = True)
    return model

In [None]:
input_shape = (260, 260, 3)
train_model = init_model(input_shape)
opt = tf.keras.optimizers.Adam()


#Regularization is added to fight overfitting
train_model = add_regularization(train_model)

train_model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
train_model

In [None]:
input_shape = (224, 224, 3)
train_model_MbNet = init_model(input_shape, "MobileNet")
opt = tf.keras.optimizers.Adam()



train_model_MbNet = add_regularization(train_model_MbNet)

train_model_MbNet.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
train_model_MbNet

In [None]:
input_shape = (224, 224, 3)
train_model_B0 = init_model(input_shape, 'B0')
opt = tf.keras.optimizers.Adam()



train_model_B0 = add_regularization(train_model_B0)

train_model_B0.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
train_model_B0

##### This is commented out, but provides the option to load already trained models

In [None]:
#train_model.load_weights('../input/weight-update/weight_improvement-05-0.43.h5')

In [None]:
#train_model_B0.load_weights('../input/weight-update/B0-06-0.43.h5')

In [None]:
#train_model_MbNet.load_weights('../input/weight-update/weight_improvement-08-0.38-mobilenet.h5')

## **3. Training the models**

In [None]:
#Checkpoints are added so that the best performing models can be saved
from tensorflow.keras.callbacks import ModelCheckpoint
filepath = 'weight_improvement-{epoch:02d}-{val_accuracy:.2f}.h5'
checkpoint = ModelCheckpoint(filepath, monitor = 'val_accuracy', verbose = 1, save_best_only = True, save_weights_only = False, mode = 'max')
callbacks_list = [checkpoint]

In [None]:
#Models are trained one after another on 10 epochs each
models = [(train_model, train_generator, validation_generator), (train_model_B0, train_generator_B0, validation_generator_B0), 
          (train_model_MbNet, train_generator_B0, validation_generator_B0)]

for model in models: 
    history = model[0].fit(model[1], validation_data=model[2], steps_per_epoch=model[1].__len__(), 
                          validation_steps=model[2].__len__(), epochs=10, callbacks = callbacks_list)

## **4. Getting the predictions**

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [26]:
#Generating images for testing
datagen = ImageDataGenerator()
test_generator = datagen.flow_from_dataframe(
    dataframe = X_test_bal,
    x_col = 'uuid',
    y_col = 'group',
    target_size = (260, 260),
    directory = DATA_FOLDERS_PATH,
    class_mode = 'categorical',
    batch_size = 32,
    shuffle = False
)

In [None]:
#This maps classes to their actual labels
label_map = test_generator.class_indices
label_map = dict((v, k) for k, v in label_map.items())
label_map

In [None]:
#Generating images for testing but with different target size for EfficientNetB0 and MobileNet models
test_generator_B0 = datagen.flow_from_dataframe(
    dataframe = X_test_bal,
    x_col = 'uuid',
    y_col = 'group',
    target_size = (224, 224),
    directory = DATA_FOLDERS_PATH,
    class_mode = 'categorical',
    batch_size = 64,
    shuffle = False
)

In [None]:
test_generator.reset()
test_generator_MbNet.reset()
test_generator_B0.reset()
y_preds = [train_model.predict(test_generator), train_model_MbNet.predict(test_generator_B0) * 0.9, train_model_B0.predict(test_generator_B0)]

In [None]:
y_preds = np.array(y_preds)
y_pred = np.sum(y_preds, axis = 0) / 3



## **5. Evaluation**

In [None]:
from sklearn.metrics import confusion_matrix, classification_report
Y_pred = np.argmax(y_pred, axis = 1)
cf_matrix = confusion_matrix(test_generator_B0.classes, Y_pred)
target_names = [str(i) for i in range(35)]
print(classification_report(test_generator_B0.classes, Y_pred, target_names=target_names))

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
plt.figure(figsize = (10, 10))
ax = sns.heatmap(cf_matrix, cmap = 'Reds')
ax.set_xlabel('Predicted')
ax.set_ylabel('Actual')
plt.show()

In [None]:
from math import radians, degrees, sin, cos, asin, acos, sqrt

def great_circle(lat_1, lon_1, lat_2, lon_2):
    #print(lon_1, lat_1, lon_2, lat_2)
    lon_1, lat_1, lon_2, lat_2 = map(radians, [lon_1, lat_1, lon_2, lat_2])
    
    return 6371 * (
        acos(sin(lat_1) * sin(lat_2) + cos(lat_1) * cos(lat_2) * cos(lon_1 - lon_2))
    )

In [None]:
def get_weights(w):
    weights = []
    for idx, i in enumerate(w):
        weights.append((label_map[idx], i))
    return weights


In [None]:
import itertools
def get_closest_groups(group, gr_ref, n = 4):
    group_lat = gr_ref.loc[str(group)]['center_latitude']
    group_lon = gr_ref.loc[str(group)]['center_longitude']
    distances = dict()
    for idx in gr_ref.index:
        if int(group) != int(idx):
            distance = great_circle(group_lat, group_lon, gr_ref.loc[idx]['center_latitude'], gr_ref.loc[idx]['center_longitude'])
            distances[str(idx)] = distance

    distances = {k: v for k, v in sorted(distances.items(), key=lambda item: item[1])}
    values = distances.values()
    min_ = min(values)
    max_ = max(values)

    n_closest = dict(itertools.islice(distances.items(), 0, n))

    return n_closest
n_closest = get_closest_groups(24, group_reference)
n_closest

In [None]:
label_map = test_generator.class_indices
label_map = dict((v, k) for k, v in label_map.items())
label_map

In [None]:
predicted_class_indices = np.argmax(y_pred, axis = 1)
predictions = [label_map[k] for k in predicted_class_indices]
real = [label_map[k] for k in test_generator.classes]

In [None]:
API_KEY = 'AIzaSyDA4dcO2U2_Bs2UFDC80bdSq54LmR247SM'
URL_PREFIX = 'https://maps.googleapis.com/maps/api/streetview/metadata?size=600x300&'
URL_SUFIX = '&fov=110&pitch=38&key='
import requests

#This function is used to get the nearest coordinates to the predicted ones that are available on google street view
def get_street_view_coordinates(coordinates, radius = 5000):
    coords = str(coordinates[0]) + ',' + str(coordinates[1])
    res = requests.get(URL_PREFIX + 'location=' + coords + '&radius=' + str(radius) + URL_SUFIX + API_KEY)
    if res.json()['status'] == 'ZERO_RESULTS':
        return False
    return (res.json()['location']['lat'], res.json()['location']['lng'])

In [None]:
def calculate_predicted_coordinates(weights, gr_ref, n = 4, street_view = True):
    groups = gr_ref['group']
    groups = list(map(str, list(map(int, groups))))
    gr_ref['group'] = groups
    result_longitude = 0
    result_latitude = 0
    top_weight = weights[0]
    closest = get_closest_groups(top_weight[0], gr_ref)

    new_probabilities = []
    for weight in weights:
        group = weight[0]

        if group in closest.keys() or group == top_weight[0]:

            new_probabilities.append([group, weight[1]])
    
    weight_sum = sum([i[1] for i in new_probabilities])
    factor = 1/weight_sum
    for j in range(len(new_probabilities)):
        new_probabilities[j][1] = new_probabilities[j][1] * factor

    for prob in new_probabilities:
        center_longitude = gr_ref[gr_ref.group == prob[0]]['center_longitude'].values[0]
        center_latitude = gr_ref[gr_ref.group == prob[0]]['center_latitude'].values[0]
        result_longitude += center_longitude * prob[1]
        result_latitude += center_latitude * prob[1]
    if street_view:
        street_view = get_street_view_coordinates((result_latitude, result_longitude), 10000)
        if street_view:
            return street_view
        
        
    return (result_latitude, result_longitude)  

In [None]:
sorted_ = X_test_bal.sort_values(['uuid'])
sorted_ = sorted_.drop_duplicates()


In [None]:
group_reference

In [None]:
from sklearn.preprocessing import scale
def calculate_grouped_predictions(df,predictions, n = 4):
    current_predictions = []
    i = 0
    final_predictions = []
    for index, row in df.iterrows():
        
        current_predictions.append(y_pred[index])
        i += 1
        if i == n:
            summed_predictions = np.sum(current_predictions, axis = 0)
            
            summed_predictions = summed_predictions / 4
            weights = get_weights(summed_predictions)
            weights.sort(key=lambda x:x[1], reverse = True)
            pred = calculate_predicted_coordinates(weights, group_reference,street_view = False)
            final_predictions.append(pred)
            print(pred)
            current_predictions = []
            i = 0
        
    return final_predictions
    
        
        
        
final_predictions = calculate_grouped_predictions(sorted_, predictions)

In [None]:
sorted_['only_uuid'] = sorted_['uuid'].str.split('/').str[0]
sorted_ = sorted_.drop_duplicates(subset='only_uuid', keep='first')
sorted_.reset_index(inplace = True, drop = True)
sorted_

In [None]:
errors = []

for index, row in sorted_.iterrows():
    real_coordinates = (row.values[1], row.values[2])
    print(real_coordinates)
    predicted_coordinates = final_predictions[index]
    error = great_circle(predicted_coordinates[0], predicted_coordinates[1], real_coordinates[0], real_coordinates[1])
    errors.append(error)
    
print(max(errors))
    
    

In [None]:
errors = np.array(errors)

In [None]:
from scipy import stats
stats.describe(errors)

In [None]:
print('Min: ', errors.min())
print('Max: ', errors.max())
print('Std: ', errors.std())
print('Mean: ', errors.mean())

In [None]:
fig = plt.figure(figsize = (8, 8))
bins = [i for i in range(0, 100, 20)]
plt.hist(errors, bins = bins)


In [None]:
plt.figure(figsize = (10, 10))
sns.boxplot(data = errors)