In [1]:
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping

import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import joblib

In [2]:
# Allow the use of custom functions in the utils folder if needed
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath('..'))

In [3]:
# Initialize dataframe to read
df = pd.read_csv('../data/Bird_Migration_Custom_Data.csv')

# Remove leading and trailing whitespaces from column names
df.columns = df.columns.str.strip()

# Remove leading and trailing whitespaces from each cell
for col in df.columns:
    if pd.api.types.is_string_dtype(df[col]):
        df[col] = df[col].str.strip()



In [4]:
# PICK THE YEARS THAT YOU WANT TO TRAIN
years_to_pick = [2007, 2008, 2009]
df = df[df['year'].isin(years_to_pick)]

In [5]:
# Initialize features and targets
features = df[['day', 'month', 'temperature', 'wind_speed']]
targets = df[['latitude']]

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

# Standardize features
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# Standardize targets (important for deep learning)
scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)

In [6]:
# Initialize the model
model = Sequential()

# Add layers
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))  # Input layer
# model.add(Dropout(0.5)) # Dropout makes the MSE worse
model.add(Dense(64, activation='relu'))  # Hidden layer
# model.add(Dropout(0.5)) # Dropout makes the MSE worse
model.add(Dense(1))  # Output layer (2 outputs for longitude and latitude)

# Compile the model
optimizer = Adam(learning_rate=0.001)  # You can try different values like 0.0001 or 0.01
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=8, validation_split=0.2, verbose=1)

Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - loss: 0.6749 - val_loss: 0.3449
Epoch 2/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2923 - val_loss: 0.2352
Epoch 3/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1890 - val_loss: 0.1797
Epoch 4/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1382 - val_loss: 0.1680
Epoch 5/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1311 - val_loss: 0.1318
Epoch 6/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1477 - val_loss: 0.1239
Epoch 7/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1146 - val_loss: 0.1064
Epoch 8/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1343 - val_loss: 0.1103
Epoch 9/100
[1m27/27[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [7]:
# Save the model
model.save('../models/NN_4features_predict_lat.h5')



In [8]:
# This cell is for having 1 output in the prediction
# Evaluate the model
loss = model.evaluate(X_test, y_test, verbose=1)
print(f'Loss on test data: {loss:.4f}')

# Make predictions
y_pred = model.predict(X_test)

# Inverse transform predictions and true values
y_test_inv = scaler_y.inverse_transform(y_test)
y_pred_inv = scaler_y.inverse_transform(y_pred)

# Calculate metrics
mae_latitude = mean_absolute_error(y_test_inv, y_pred_inv)
mse_latitude = mean_squared_error(y_test_inv, y_pred_inv)
rmse_latitude = np.sqrt(mse_latitude)
r2_latitude = r2_score(y_test_inv, y_pred_inv)

print(f'Mean Absolute Error for Latitude: {mae_latitude:.2f}')
print(f'Mean Squared Error for Latitude: {mse_latitude:.2f}')
print(f'Root Mean Squared Error for Latitude: {rmse_latitude:.2f}')
print(f'R-squared for Latitude: {r2_latitude:.2f}')

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0713 
Loss on test data: 0.0623
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
Mean Absolute Error for Latitude: 4.03
Mean Squared Error for Latitude: 34.79
Root Mean Squared Error for Latitude: 5.90
R-squared for Latitude: 0.94
