In [19]:
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.callbacks import EarlyStopping

import tensorflow as tf
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import joblib

In [20]:
# Allow the use of custom functions in the utils folder if needed
import sys
import os

# Add the parent directory to the Python path
sys.path.append(os.path.abspath('..'))

In [21]:
# Function to read and parse the .tct file
def read_param_from_file(filepath):
    with open(filepath, 'r') as file:
        lines = file.readlines()
    
    # Remove any empty lines and comments
    lines = [line.strip() for line in lines if line.strip() and not line.startswith('#')]
    
    # Extract features, targets, and years
    features_line = lines[0].split(',')
    targets_line = lines[1].split(',')
    years_line = lines[2].split(',')
    
    features_from_param = [feature.strip() for feature in features_line]
    targets_from_param = [target.strip() for target in targets_line]
    years_to_pick = [int(year.strip()) for year in years_line]
    
    return features_from_param, targets_from_param, years_to_pick



In [22]:
# Initialize dataframe to read
df = pd.read_csv('../data/Bird_Migration_Custom_Data.csv')

# Remove leading and trailing whitespaces from column names
df.columns = df.columns.str.strip()

# Remove leading and trailing whitespaces from each cell
for col in df.columns:
    if pd.api.types.is_string_dtype(df[col]):
        df[col] = df[col].str.strip()

In [23]:
# Read the features, targets, and years to pick from the parameters.txt file
features_from_param, targets_from_param, years_to_pick_from_param = read_param_from_file("parameters.txt")

# Filter the DataFrame based on the specified years
df = df[df['year'].isin(years_to_pick_from_param)]

# Initialize features and targets
features = df[features_from_param]
targets = df[targets_from_param]

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, targets, test_size=0.2, random_state=42)

# Standardize features
scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

# Standardize targets (important for deep learning)
scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)

In [24]:
# Initialize the model
model = Sequential()

# Add layers
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))  # Input layer
# model.add(Dropout(0.5)) # Dropout makes the MSE worse
model.add(Dense(64, activation='relu'))  # Hidden layer
# model.add(Dropout(0.5)) # Dropout makes the MSE worse
model.add(Dense(1))  # Output layer (2 outputs for longitude and latitude)

# Compile the model
optimizer = Adam(learning_rate=0.001)  # You can try different values like 0.0001 or 0.01
model.compile(optimizer=optimizer, loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=300, batch_size=8, validation_split=0.2, verbose=1)

Epoch 1/300


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.7805 - val_loss: 0.1813
Epoch 2/300
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2152 - val_loss: 0.1664
Epoch 3/300
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1864 - val_loss: 0.1407
Epoch 4/300
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1784 - val_loss: 0.1436
Epoch 5/300
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1463 - val_loss: 0.1378
Epoch 6/300
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1404 - val_loss: 0.1161
Epoch 7/300
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1106 - val_loss: 0.1133
Epoch 8/300
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1130 - val_loss: 0.1069
Epoch 9/300
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [25]:
# Save the model
model.save('../models/NN_4features_predict_lat.h5')



In [26]:
# This cell is for having 1 output in the prediction
# Evaluate the model
loss = model.evaluate(X_test, y_test, verbose=1)
print(f'Loss on test data: {loss:.4f}')

# Make predictions
y_pred = model.predict(X_test)

# Inverse transform predictions and true values
y_test_inv = scaler_y.inverse_transform(y_test)
y_pred_inv = scaler_y.inverse_transform(y_pred)

# Calculate metrics
mae_latitude = mean_absolute_error(y_test_inv, y_pred_inv)
mse_latitude = mean_squared_error(y_test_inv, y_pred_inv)
rmse_latitude = np.sqrt(mse_latitude)
r2_latitude = r2_score(y_test_inv, y_pred_inv)

print(f'Mean Absolute Error for Latitude: {mae_latitude:.2f}')
print(f'Mean Squared Error for Latitude: {mse_latitude:.2f}')
print(f'Root Mean Squared Error for Latitude: {rmse_latitude:.2f}')
print(f'R-squared for Latitude: {r2_latitude:.2f}')

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1611 
Loss on test data: 0.1380








[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step
Mean Absolute Error for Latitude: 5.76
Mean Squared Error for Latitude: 92.98
Root Mean Squared Error for Latitude: 9.64
R-squared for Latitude: 0.85


In [27]:
# Example input data (day, month, temperature, wind_speed)
new_data = np.array([[10, 12, 24,  2]]) 
new_data_scaled = scaler_X.transform(new_data)

# Predict using the trained model
prediction_scaled = model.predict(new_data_scaled)

# Reverse the scaling to get the original units
prediction = scaler_y.inverse_transform(prediction_scaled)

# Output the prediction
print("Predicted Latitude:", prediction[0][0])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
Predicted Latitude: -4.376946


