<a href="https://colab.research.google.com/github/Helerik/animal-movemet-models/blob/main/g2_models_rnn_tests.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing necessary packages/libraries/APIs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import pickle
import json
import matplotlib.pyplot as plt
import time
import gc

# Defining models structures

In [None]:
def create_rnn_model(input_dim, units, activation = 'tanh', output_dim=2, l1=0, l2=0):
    inputs = tf.keras.Input(name='inputs', shape=(None, input_dim))
    x = tf.keras.layers.SimpleRNN(units, activation=activation,
                                  return_sequences=True, name='rnn',
                                  kernel_regularizer=tf.keras.regularizers.l1_l2(l1=l1, l2=l2))(inputs)
    outputs = tf.keras.layers.Dense(output_dim, activation=tf.keras.activations.linear, name='out',
                                   kernel_regularizer=tf.keras.regularizers.l1_l2(l1=l1, l2=l2))(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs, name='rnn_'+str(units))
    model.output_length = lambda x: x
    return model

In [None]:
all_model_units = [32,64,128]

# Import data

In [None]:
datasets = []
for id in [12,13,18,22,23,41,52,81,88,91,92,116,117]:
    name = "jag"+str(id)+"_rnn.csv"
    chunk = pd.read_csv("/content/drive/MyDrive/Trabalho de Formatura/00 - Animal Movement/animal-movement-model/data/rnn_jaguar_data/"+name, sep = ",", decimal = ".", chunksize = 10000)
    data = pd.concat(chunk, ignore_index = True)
    del chunk
    datasets.append(
        data
    )

# Defining training and testing sets

In [None]:
# Datasets for validation on not seen jaguars
out_of_sample_jags = [datasets[4], datasets[6], datasets[9], datasets[10]]

In [None]:
datasets.pop(4)
datasets.pop(6-1)
datasets.pop(9-2)
datasets.pop(10-3)
print()




In [None]:
139 + 147 + 13 + 21

320

In [None]:
print(len(out_of_sample_jags[0]))
print(len(out_of_sample_jags[1]))
print(len(out_of_sample_jags[2]))
print(len(out_of_sample_jags[3]))

139
147
13
21


In [None]:
# Defining train and test sets for seen jaguars
all_test_sets =  []
all_train_sets = []
for dataset in datasets:
    all_test_sets.append(
    dataset.tail(int(np.round(0.2 * len(dataset),0)))
    )
    all_train_sets.append(
    dataset.head(int(np.round(0.8 * len(dataset),0)))
    )

In [None]:
sum_ = 0
for i in range(len(all_test_sets)):
  sum_ += len(all_test_sets[i])
sum_

1879

In [None]:
del datasets, dataset, data, name, id

In [None]:
gc.collect() 

252

# Prepare data for training and testing

In [None]:
x_train = []
x_test = []
y_train = []
y_test = []

x_test_out_jags = []
y_test_out_jags = []

for x in all_train_sets:
    y = x.filter(['v_suggested_x', 'v_suggested_y'])
    x.drop(['v_jaguar_ID', 'v_step_Num', 'v_suggested_x', 'v_suggested_y'],inplace=True,axis=1)
    
    y = tf.convert_to_tensor(y.to_numpy().reshape(1, len(x), 2))
    x = tf.convert_to_tensor(x.to_numpy().reshape(1, len(x), 6234))
    
    x_train.append(x)
    y_train.append(y)
    
for x in all_test_sets:
    y = x.filter(['v_suggested_x', 'v_suggested_y'])
    x.drop(['v_jaguar_ID', 'v_step_Num', 'v_suggested_x', 'v_suggested_y'],inplace=True,axis=1)
    
    y = tf.convert_to_tensor(y.to_numpy().reshape(1, len(x), 2))
    x = tf.convert_to_tensor(x.to_numpy().reshape(1, len(x), 6234))
    
    x_test.append(x)
    y_test.append(y)

for x in out_of_sample_jags:
    y = x.filter(['v_suggested_x', 'v_suggested_y'])
    x.drop(['v_jaguar_ID', 'v_step_Num', 'v_suggested_x', 'v_suggested_y'],inplace=True,axis=1)
    
    y = tf.convert_to_tensor(y.to_numpy().reshape(1, len(x), 2))
    x = tf.convert_to_tensor(x.to_numpy().reshape(1, len(x), 6234))
    
    x_test_out_jags.append(x)
    y_test_out_jags.append(y)
    


In [None]:
del x,y

In [None]:
gc.collect()

100

# Defining training and testing procedures and logging

In [None]:
# Parameter configs for testing
configs = [
    [0.00001,0.005,0.005],
    [0.00001,0.005,0.0001],
    [0.00001,0.005,0.000001],
    [0.00001,0.0001,0.005],
    [0.00001,0.0001,0.0001],
    [0.00001,0.000001,0.005],
    [0.0001,0.005,0.005],
    [0.0001,0.005,0.0001],
    [0.0001,0.005,0.000001],
    [0.0001,0.0001,0.005],
    [0.0001,0.0001,0.0001],
    [0.0001,0.000001,0.005],
    [0.001,0.005,0.005],
    [0.001,0.005,0.0001],
    [0.001,0.005,0.000001],
    [0.001,0.0001,0.005],
    [0.001,0.0001,0.0001],
    [0.001,0.000001,0.005],
]
# [alpha, l1, l2]

# Logging:
log = {}

In [None]:
# Test N configurations
for n in range(0,len(configs)):
    
    np.random.seed(n*n*2)
    tf.random.set_seed(n*n)
    
    print("\n============================================\n\n"+str(n)+"-th parameter configuration:\n")
    
    #Sample hyperparameter configuration
    learning_rate = configs[n][0]
    l1 = configs[n][1]
    l2 = configs[n][2]
    
    print("Learning Rate:",learning_rate)
    print("L1:",l1)
    print("L2:",l2)
    print()
    
    # Log n-th configuration
    log['parameter_config'+str(n)] = {'learning_rate':learning_rate,
                                      'l1':l1,
                                      'l2':l2}
    # Test every model
    for i in range(len(all_model_units)):
        
        model = create_rnn_model(input_dim = 6234, units = all_model_units[i], l1=l1, l2=l2)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                      loss="mean_squared_error")
        
        print("\n    Model:",model.name)
        # log model being tested
        log['parameter_config'+str(n)]['model_'+model.name] = {'model_name':model.name}
        
        
        best_loss = np.inf
        patience_counter = 0
        history = []
        t = time.time()
        final_epoch = 500
        # Minibatch of size 1 training loop, for 500 epochs, with early stopping (patience of 25):
        for epoch in range(final_epoch):
            print(f"Epoch: {epoch+1 : 10}",end='\n')
            for j in range(len(x_train)):
                model.fit(x_train[j], y_train[j], epochs = 1, shuffle = False, verbose = 0)
            
            # Evaluate test loss every 5 epochs
            if epoch % 5 == 0:
                epoch_test_loss = 0
                for j in range(len(x_test)):
                    epoch_test_loss += model.evaluate(x_test[j],y_test[j],verbose = 0) / len(x_test)
                history.append(epoch_test_loss)
                if epoch_test_loss < best_loss:
                    patience_counter = 0
                    best_loss = epoch_test_loss
                else:
                    patience_counter += 1
            if patience_counter == 25:
                final_epoch = epoch+1
                break
        t = time.time()-t
        

        # Evaluate model on training and test sets
        # Loss:
        train_loss = 0
        for j in range(len(x_train)):
            train_loss += model.evaluate(x_train[j],y_train[j],verbose=0) / len(x_train)
            
        test_loss = 0
        for j in range(len(x_test)):
            test_loss += model.evaluate(x_test[j],y_test[j],verbose=0) / len(x_test)
            
        out_jag_test_loss = 0
        for j in range(len(x_test_out_jags)):
            out_jag_test_loss += model.evaluate(x_test_out_jags[j],y_test_out_jags[j],verbose=0) / len(x_test_out_jags)
            
        # Distances
        train_avg_distances = 0
        n_points = 0
        for j in range(len(x_train)):
            y_pred = model.call(x_train[j]).numpy()
            y_pred = np.reshape(y_pred, [y_pred.shape[1],2])
            diff2 = np.square(np.reshape(y_train[j].numpy(),[y_train[j].shape[1],2]) - y_pred)
            dists = np.sqrt(np.sum(diff2, axis=1))
            n_points += len(dists)
            sum_dists = np.sum(diff2)
            train_avg_distances += sum_dists
        train_avg_distances /= n_points
        
        test_avg_distances = 0
        n_points = 0
        for j in range(len(x_test)):
            y_pred = model.call(x_test[j]).numpy()
            y_pred = np.reshape(y_pred, [y_pred.shape[1],2])
            diff2 = np.square(np.reshape(y_test[j].numpy(),[y_test[j].shape[1],2]) - y_pred)
            dists = np.sqrt(np.sum(diff2, axis=1))
            n_points += len(dists)
            sum_dists = np.sum(diff2)
            test_avg_distances += sum_dists
        test_avg_distances /= n_points
        
        out_jag_test_avg_distances = 0
        n_points = 0
        for j in range(len(x_test_out_jags)):
            y_pred = model.call(x_test_out_jags[j]).numpy()
            y_pred = np.reshape(y_pred, [y_pred.shape[1],2])
            diff2 = np.square(np.reshape(y_test_out_jags[j].numpy(),[y_test_out_jags[j].shape[1],2]) - y_pred)
            dists = np.sqrt(np.sum(diff2, axis=1))
            n_points += len(dists)
            sum_dists = np.sum(diff2)
            out_jag_test_avg_distances += sum_dists
        out_jag_test_avg_distances /= n_points
        
        # Considering we had all train information, and want to predict the rest (test data)
        test_after_train_avg_distances = 0
        n_points = 0
        for j in range(len(x_train)):
            x_train_test = tf.concat([x_train[j],x_test[j]],axis=1)
            y_pred = model.call(x_train_test).numpy()
            y_pred = np.reshape(y_pred, [y_pred.shape[1],2])[-len(x_test[j][0]):]
            diff2 = np.square(np.reshape(y_test[j].numpy(),[y_test[j].shape[1],2]) - y_pred)
            dists = np.sqrt(np.sum(diff2, axis=1))
            n_points += len(dists)
            sum_dists = np.sum(diff2)
            test_after_train_avg_distances += sum_dists
        test_after_train_avg_distances /= n_points
            
        # Log L-th fold training/test info
        log['parameter_config'+str(n)]['model_'+model.name]['training_loss'] = train_loss
        log['parameter_config'+str(n)]['model_'+model.name]['testing_loss'] =  test_loss
        log['parameter_config'+str(n)]['model_'+model.name]['out_jag_testing_loss'] = out_jag_test_loss
        log['parameter_config'+str(n)]['model_'+model.name]['train_avg_distances'] = train_avg_distances
        log['parameter_config'+str(n)]['model_'+model.name]['test_avg_distances'] = test_avg_distances
        log['parameter_config'+str(n)]['model_'+model.name]['out_jag_test_avg_distances'] = out_jag_test_avg_distances
        log['parameter_config'+str(n)]['model_'+model.name]['test_after_train_avg_distances'] = test_after_train_avg_distances
        log['parameter_config'+str(n)]['model_'+model.name]['epochs'] = final_epoch
        log['parameter_config'+str(n)]['model_'+model.name]['time'] = t
        
        print()
        print(f'training_loss = {train_loss}')
        print(f'testing_loss =  {test_loss}')
        print(f'out_jag_testing_loss = {out_jag_test_loss}')
        print(f'train_avg_distances = {train_avg_distances}')
        print(f'test_avg_distances = {test_avg_distances}')
        print(f'out_jag_test_avg_distances = {out_jag_test_avg_distances}')
        print(f'test_after_train_avg_distances = {test_after_train_avg_distances}')
        print()

        # summarize history for loss on test
        plt.plot(np.arange(0,final_epoch,5),history)
        plt.title('model loss')
        plt.ylabel('test loss')
        plt.xlabel('epoch')
        plt.savefig('/content/drive/MyDrive/Trabalho de Formatura/00 - Animal Movement/animal-movement-model/log_rnn/test_loss_config_'+str(n)+'_'+model.name)
        plt.cla()
        plt.clf()

        plt.plot(np.arange(0,final_epoch,5),history)
        plt.title('model loss')
        plt.ylabel('test loss')
        plt.xlabel('epoch')
        plt.yscale('log')
        plt.savefig('/content/drive/MyDrive/Trabalho de Formatura/00 - Animal Movement/animal-movement-model/log_rnn/test_loss_logscale_config_'+str(n)+'_'+model.name)
        plt.cla()
        plt.clf()

        gc.collect()

        # Temporarily save log files
        pickle.dump(log,open("/content/drive/MyDrive/Trabalho de Formatura/00 - Animal Movement/animal-movement-model/log_rnn/log_rnn1"+".pkl",'wb'))
        gc.collect()

    # Temporarily save log files
    pickle.dump(log,open("/content/drive/MyDrive/Trabalho de Formatura/00 - Animal Movement/animal-movement-model/log_rnn/log_rnn1"+".pkl",'wb'))
    gc.collect()

# Definitively save log files
pickle.dump(log,open("/content/drive/MyDrive/Trabalho de Formatura/00 - Animal Movement/animal-movement-model/log_rnn/log_rnn1"+".pkl",'wb'))
gc.collect() 



16-th parameter configuration:

Learning Rate: 0.001
L1: 0.0001
L2: 0.0001


    Model: rnn_32
Epoch:          1
Epoch:          2
Epoch:          3
Epoch:          4
Epoch:          5
Epoch:          6
Epoch:          7
Epoch:          8
Epoch:          9
Epoch:         10
Epoch:         11
Epoch:         12
Epoch:         13
Epoch:         14
Epoch:         15
Epoch:         16
Epoch:         17
Epoch:         18
Epoch:         19
Epoch:         20
Epoch:         21
Epoch:         22
Epoch:         23
Epoch:         24
Epoch:         25
Epoch:         26
Epoch:         27
Epoch:         28
Epoch:         29
Epoch:         30
Epoch:         31
Epoch:         32
Epoch:         33
Epoch:         34
Epoch:         35
Epoch:         36
Epoch:         37
Epoch:         38
Epoch:         39
Epoch:         40
Epoch:         41
Epoch:         42
Epoch:         43
Epoch:         44
Epoch:         45
Epoch:         46
Epoch:         47
Epoch:         48
Epoch:         49
Epoch:         50
Epo

0

<Figure size 432x288 with 0 Axes>