# Import Libraries

In [None]:
import tensorflow
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Dense, Conv2D, MaxPool2D, Dropout, Flatten, MaxPooling2D, GlobalAveragePooling2D
from keras.models import Sequential
from keras.preprocessing.image import ImageDataGenerator
from keras.losses import categorical_crossentropy
from keras.callbacks import EarlyStopping
from tensorflow.keras.applications import EfficientNetB0
from google.colab import drive
from keras.utils.traceback_utils import include_frame

tensorflow.random.set_seed(42)



# Connect to drive

In [None]:
drive.mount('/content/drive/')

# Open zip in google Drive

In [None]:
from zipfile import ZipFile
file_name = 'drive/MyDrive/datasets/data.zip'

with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print('Done')

# Import EDA Data

In [None]:
# Helpers

#                ['antelope_duiker', 'bird', 'blank', 'civet_genet', 'hog', 'leopard', 'monkey_prosimian', 'rodent']
# Classification:           1           2        3           4          5        6              7               8
def merge_animals(df):
    """_summary_

    Args:
        df (_type_): _description_

    Returns:
        _type_: _description_
    """
    df_copy = df.copy() # copy df
    columns = df_copy.columns # get column names
    df_copy['animal_classification'] = np.where(df_copy.values)[1]+1 # add a numeric value to each column
    df_copy.drop(columns ,axis=1, inplace=True) # drop columns that were just combined
    return df_copy

def plot_metrics(model_fit):
    metrics = ['accuracy', 'precision', 'recall']
    for i in metrics:
        plt.plot(model_fit.history[i], label='Train')
        plt.plot(model_fit.history[f'val_{i}'], label='Test')
        plt.ylabel(i)
        plt.xlabel('Epochs')
        plt.legend()
        plt.show()

In [None]:
test_features = pd.read_csv('data/test_features.csv')
train_features = pd.read_csv('data/train_features.csv')
train_labels = pd.read_csv('data/train_labels.csv')

train = pd.merge(left=train_features, right=train_labels, on='id') # combine df's and the right answer

# Make Validation And Train Data

In [None]:
# combine train df's on id
train = pd.merge(left=train_features, right=train_labels, on='id') 

# see function in above cell
train['animal_classification'] = merge_animals(train[['antelope_duiker', 'bird', 'blank', 'civet_genet', 'hog', 'leopard', 'monkey_prosimian', 'rodent']]) 
# done in function above ^ might need 
# train.drop(['antelope_duiker', 'bird', 'blank', 'civet_genet', 'hog', 'leopard', 'monkey_prosimian', 'rodent'] ,axis=1, inplace=True) # drop

# rename numeric observations to actual classifications
train['animal_classification'] = train['animal_classification'].map({1:'antelope_duiker', 2:'bird', 3:'blank', 4:'civet_genet', 5:'hog', 6:'leopard', 7:'monkey_prosimian', 8:'rodent'})

# split file path column to get file names
temp = train['filepath'].str.split(pat='/',expand=True)
# rename split columns
temp.rename(columns={0: 'old_folder_location', 1: 'filename'}, inplace=True)

# concat columns and original df
train = pd.concat([train, temp], axis=1).drop(columns=['filepath'],axis=1)


In [None]:
# make validation set
validation_set = train[(train['site']=='S0009') | (train['site']=='S0043')| (train['site']=='S0059') |(train['site']== 'S0026')] # get validation set for 2 sites
# make training set
train_set = train[~train.isin(validation_set)].dropna() # remove the observations from train that are in the validation set

In [None]:
validation_set['animal_classification'].value_counts(normalize=True)

# Set up Model Data

In [None]:
train_path = 'data/train_features_img/'
test_path = 'data/test_features_img'

In [None]:
# image Gen stuff
img_gen = ImageDataGenerator(
        # brightness_range=[.4, 1.2],
)
val_generator = img_gen.flow_from_dataframe(
    validation_set, 
    directory=train_path, 
    x_col='filename', 
    y_col='animal_classification', 
    target_size=(256, 256), 
    class_mode='categorical',
    batch_size=32
)
train_generator = img_gen.flow_from_dataframe(
    train_set, 
    directory=train_path, 
    x_col='filename', 
    y_col='animal_classification', 
    target_size=(256, 256), 
    class_mode='categorical',
    batch_size=32
)

# Model Testing

In [None]:
model = Sequential()
# layers
model.add(Conv2D(filters=16, kernel_size=(3,3), activation='relu'))
model.add(MaxPool2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(units=100, activation='relu'))
# output layer
model.add(Dense(units=8, activation='softmax'))

model.compile(loss=categorical_crossentropy, optimizer='adam', metrics=['accuracy', 'Recall', 'Precision'])

history = model.fit(
    train_generator,
    batch_size=64,
    epochs=50,
    validation_data=val_generator
)

In [None]:
plot_metrics(history)