In [1]:
import IPython
import IPython.display
import keras
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn
import tensorflow as tf
from sklearn.metrics import mean_absolute_error, mean_squared_error

import auxiliaries as aux

## Setup

In [2]:
# Develop window for data separation
species = 'Deer'
file_name = 'GSM02927'

years_to_predict = 1
time_intervals = 3 if species == 'Moose' else 4

regressive_window = aux.RegressiveWindow(species, file_name)
reversed_regressive_window = aux.ReversedRegressiveWindow(species, file_name)

results = {}

print('------Original Regressive Window------')
print(f'Original Data Size: {len(regressive_window.orig_df)}')
print(f'Training Data Size: {len(regressive_window.train_df)}')
print(f'Testing Data Size: {len(regressive_window.test_df)}\n')

print(f'Training Mean:\n{regressive_window.train_df.mean()}')
print(f'Training Std. Dev.:\n{regressive_window.train_df.std()}\n')

print('------Reversed Regressive Window------')
print(f'Original Data Size: {len(reversed_regressive_window.orig_df)}')
print(f'Training Data Size: {len(reversed_regressive_window.train_df)}')
print(f'Testing Data Size: {len(reversed_regressive_window.test_df)}\n')

print(f'Training Mean:\n{reversed_regressive_window.train_df.mean()}')
print(f'Training Std. Dev.:\n{reversed_regressive_window.train_df.std()}\n')

------Original Regressive Window------
Original Data Size: 2923
Training Data Size: 2046
Testing Data Size: 877

Training Mean:
external-temperature      16.545455
longitude                 11.031551
latitude                  46.008411
altitude                1408.284307
month                      6.251222
day                       15.697458
dtype: float64
Training Std. Dev.:
external-temperature      5.343885
longitude                 0.021036
latitude                  0.002235
altitude                313.338906
month                     3.396595
day                       8.695606
dtype: float64

------Reversed Regressive Window------
Original Data Size: 2923
Training Data Size: 2046
Testing Data Size: 877

Training Mean:
external-temperature      15.382209
longitude                 11.036021
latitude                  46.008706
altitude                1467.595691
month                      6.727273
day                       15.601173
dtype: float64
Training Std. Dev.:
external-tempera

### Regressive Model

In [3]:
regressive_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, input_shape=(1,3), activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(2)
])

regressive_window.model_compilation_and_fitting(regressive_model)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.7404 - mean_absolute_error: 0.6958 - mean_squared_error: 0.7404
Epoch 2/20
[1m36/64[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m0s[0m 5ms/step - loss: 0.5314 - mean_absolute_error: 0.5492 - mean_squared_error: 0.5314

  current = self.get_monitor_value(logs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.5429 - mean_absolute_error: 0.5517 - mean_squared_error: 0.5429
Epoch 3/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.5273 - mean_absolute_error: 0.5424 - mean_squared_error: 0.5273
Epoch 4/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4900 - mean_absolute_error: 0.5057 - mean_squared_error: 0.4900
Epoch 5/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4602 - mean_absolute_error: 0.4899 - mean_squared_error: 0.4602
Epoch 6/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.4531 - mean_absolute_error: 0.4864 - mean_squared_error: 0.4531
Epoch 7/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4218 - mean_absolute_error: 0.4658 - mean_squared_error: 0.4218
Epoch 8/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x17eb58389e0>

In [4]:
results['Original Regressive Model'] = regressive_model.evaluate(regressive_window.test_input, regressive_window.test_label, verbose=1, return_dict=True)

[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 1.0444 - mean_absolute_error: 0.7193 - mean_squared_error: 1.0444  


In [5]:
results['Original Regressive Model']

{'loss': 0.7951700091362,
 'mean_absolute_error': 0.6521283388137817,
 'mean_squared_error': 0.7951700091362}

In [6]:
# Input ordering
original_output = regressive_window.test_label
original_output = original_output.reshape((original_output.shape[0], original_output.shape[-1]))#*regressive_window.train_df[['longitude', 'latitude']].std().values + regressive_window.train_df[['longitude', 'latitude']].mean().values
predicted_output = regressive_model(regressive_window.test_input).numpy()
predicted_output = predicted_output.reshape((predicted_output.shape[0], predicted_output.shape[-1]))#*regressive_window.train_df[['longitude', 'latitude']].std().values + regressive_window.train_df[['longitude', 'latitude']].mean().values

# Statistics gathering
mae_values = list(mean_absolute_error(original_output, predicted_output, multioutput='raw_values'))
mae_values = {coord_name: coord_value for coord_name, coord_value in zip(['mae_longitude', 'mae_latitude'], mae_values)}
mse_values = list(mean_squared_error(original_output, predicted_output, multioutput='raw_values'))
mse_values = {coord_name: coord_value for coord_name, coord_value in zip(['mse_longitude', 'mse_latitude'], mse_values)}

# Apply statistics to results
results['Original Regressive Model'].update(mae_values)
results['Original Regressive Model'].update(mse_values)

In [7]:
regressive_window.csv_extension(f'{species}/{file_name}_regressive', species, regressive_model)

In [8]:
regressive_model.save(f'ModelFiles/SavedModels/{species}/{file_name}_regressive.keras')

In [9]:
# Generate CSV file analyzing testing set results
def graph_regressive(model, window, url_dest):
    test_df = window.test_df
    test_df['timestamp'] = window.timeline[int(len(window.orig_df)*0.7):]

    initial_df = test_df[['timestamp', 'month', 'day', 'external-temperature', 'longitude', 'latitude']]
    initial_df['id'] = ['original' for _ in range(len(initial_df))]

    data = {
        'timestamp': [],
        'month': [],
        'day': [],
        'external-temperature': [],
        'longitude': [],
        'latitude': [],
        'id': []
    }

    for i in range(len(initial_df)):
        section = initial_df.iloc[i]

        timestamp = section['timestamp']
        month = (section['month'] - window.train_df['month'].mean())/window.train_df['month'].std()
        day = (section['day'] - window.train_df['day'].mean())/window.train_df['day'].std()
        external_temp = (section['external-temperature'] - window.train_df['external-temperature'].mean())/window.train_df['external-temperature'].std()

        if isinstance(model, tf.keras.Sequential):
            output_fields = model(np.array([external_temp, month, day]).reshape([1, 1, 3]))*window.train_df[['longitude', 'latitude']].std() + window.train_df[['longitude', 'latitude']].mean()
            output_fields = output_fields.numpy()[0][0]
        elif isinstance(model, sklearn.neighbors.KNeighborsRegressor):
            output_fields = model.predict([[external_temp, month, day]])[0]*window.train_df[['longitude', 'latitude']].std() + window.train_df[['longitude', 'latitude']].mean()
            output_fields = output_fields.values

        longitude = output_fields[0]
        latitude = output_fields[1]
        id = 'predicted'

        data['timestamp'].append(timestamp)
        data['month'].append(section['month'])
        data['day'].append(section['day'])
        data['external-temperature'].append(section['external-temperature'])
        data['longitude'].append(longitude)
        data['latitude'].append(latitude)
        data['id'].append(id)

    add_on_df = pd.DataFrame(data)
    
    full_df = pd.concat([initial_df, add_on_df], ignore_index=True)
    full_df.to_csv(url_dest, index=False)

    return full_df

graph_regressive(regressive_model, regressive_window, f'CSVFiles/TestPerformanceCSV/{species}/{file_name}_regressive_EdgeAnalysis.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['timestamp'] = window.timeline[int(len(window.orig_df)*0.7):]


Unnamed: 0,timestamp,month,day,external-temperature,longitude,latitude,id
0,2007-10-19 16:02:55,10,19,-0.6,11.047242,46.010533,original
1,2007-10-19 20:01:47,10,19,-3.6,11.048442,46.008974,original
2,2007-10-20 00:01:11,10,20,11.4,11.047115,46.009779,original
3,2007-10-20 04:00:55,10,20,-7.3,11.048017,46.008876,original
4,2007-10-20 08:00:55,10,20,-8.0,11.046749,46.009134,original
...,...,...,...,...,...,...,...
1749,2008-03-14 16:00:54,3,14,4.4,11.040586,46.011711,predicted
1750,2008-03-14 20:01:53,3,14,4.7,11.040606,46.011707,predicted
1751,2008-03-15 00:03:05,3,15,3.8,11.039993,46.011780,predicted
1752,2008-03-15 04:02:21,3,15,1.0,11.039434,46.011803,predicted


### Reveresed Regressive

In [10]:
reversed_regressive_model = tf.keras.models.Sequential([
    tf.keras.layers.Dense(256, input_shape=(1,3), activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(2)
])

reversed_regressive_window.model_compilation_and_fitting(reversed_regressive_model)

Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.6962 - mean_absolute_error: 0.6248 - mean_squared_error: 0.6962
Epoch 2/20
[1m26/64[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m0s[0m 4ms/step - loss: 0.5366 - mean_absolute_error: 0.5206 - mean_squared_error: 0.5366

  current = self.get_monitor_value(logs)


[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.5358 - mean_absolute_error: 0.5176 - mean_squared_error: 0.5358
Epoch 3/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4769 - mean_absolute_error: 0.4833 - mean_squared_error: 0.4769
Epoch 4/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4551 - mean_absolute_error: 0.4751 - mean_squared_error: 0.4551
Epoch 5/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4319 - mean_absolute_error: 0.4603 - mean_squared_error: 0.4319
Epoch 6/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4180 - mean_absolute_error: 0.4445 - mean_squared_error: 0.4180
Epoch 7/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 0.4384 - mean_absolute_error: 0.4543 - mean_squared_error: 0.4384
Epoch 8/20
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

<keras.src.callbacks.history.History at 0x17eb6ac4050>

In [11]:
results['Reversed Regressive Model'] = reversed_regressive_model.evaluate(reversed_regressive_window.test_input, reversed_regressive_window.test_label, verbose=1, return_dict=True)

[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 2.3532 - mean_absolute_error: 1.2645 - mean_squared_error: 2.3532


In [12]:
results['Reversed Regressive Model']

{'loss': 2.2566452026367188,
 'mean_absolute_error': 1.1859629154205322,
 'mean_squared_error': 2.2566452026367188}

In [13]:
# Input ordering
reversed_original_output = reversed_regressive_window.test_label
reversed_original_output = reversed_original_output.reshape((reversed_original_output.shape[0], reversed_original_output.shape[-1])) #*reversed_regressive_window.train_df[['longitude', 'latitude']].std().values + reversed_regressive_window.train_df[['longitude', 'latitude']].mean().values
reversed_predicted_output = regressive_model(regressive_window.test_input).numpy()
reversed_predicted_output = reversed_predicted_output.reshape((reversed_predicted_output.shape[0], reversed_predicted_output.shape[-1])) #*reversed_regressive_window.train_df[['longitude', 'latitude']].std().values + reversed_regressive_window.train_df[['longitude', 'latitude']].mean().values

# Statistics gathering
mae_values = list(mean_absolute_error(reversed_original_output, reversed_predicted_output, multioutput='raw_values'))
mae_values = {coord_name: coord_value for coord_name, coord_value in zip(['mae_longitude', 'mae_latitude'], mae_values)}
mse_values = list(mean_squared_error(reversed_original_output, reversed_predicted_output, multioutput='raw_values'))
mse_values = {coord_name: coord_value for coord_name, coord_value in zip(['mse_longitude', 'mse_latitude'], mse_values)}

# Apply statistics to results
results['Reversed Regressive Model'].update(mae_values)
results['Reversed Regressive Model'].update(mse_values)

In [14]:
reversed_regressive_window.csv_extension(f'{species}/{file_name}_reversed_regressive', species, reversed_regressive_model)

In [15]:
reversed_regressive_model.save(f'ModelFiles/SavedModels/{species}/{file_name}_reversed_regressive.keras')

In [16]:
graph_regressive(reversed_regressive_model, reversed_regressive_window, f'CSVFiles/TestPerformanceCSV/{species}/{file_name}_reversed_regressive_EdgeAnalysis.csv')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['timestamp'] = window.timeline[int(len(window.orig_df)*0.7):]


Unnamed: 0,timestamp,month,day,external-temperature,longitude,latitude,id
0,2007-04-07 16:00:50,4,7,5.7,11.048319,46.010373,original
1,2007-04-07 12:01:42,4,7,5.0,11.048023,46.012476,original
2,2007-04-07 08:03:09,4,7,2.6,11.047479,46.011052,original
3,2007-04-07 04:00:53,4,7,2.6,11.046926,46.009605,original
4,2007-04-07 00:00:54,4,7,3.4,11.046919,46.010993,original
...,...,...,...,...,...,...,...
1749,2006-11-13 04:01:23,11,13,-4.2,11.046472,46.011829,predicted
1750,2006-11-13 00:02:24,11,13,-3.5,11.046375,46.011765,predicted
1751,2006-11-12 20:03:04,11,12,0.0,11.046019,46.011520,predicted
1752,2006-11-12 16:03:04,11,12,2.6,11.045450,46.011238,predicted


### Final Results

In [17]:
results_df = pd.DataFrame(results).T
results_df.to_csv(f'ModelFiles/Statistics/{species}/{file_name}_test_data.csv', index_label='Model_Name')
results_df

Unnamed: 0,loss,mean_absolute_error,mean_squared_error,mae_longitude,mae_latitude,mse_longitude,mse_latitude
Original Regressive Model,0.79517,0.652128,0.79517,0.4945,0.809757,0.557876,1.032464
Reversed Regressive Model,2.256645,1.185963,2.256645,1.487936,1.316857,2.856823,2.552893
