In [None]:
import pandas as pd
import keras as keras
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Assuming the feature engineering and preprocessing steps are done

# Step 1: Normalize the data
scaler = MinMaxScaler()

# Target variables: latitude and longitude
y_lat = traindf["latitude"].copy()
y_lon = traindf["longitude"].copy()

# Drop unnecessary columns (latitude, longitude) and features not relevant for the NN
X = traindf.drop(columns=['latitude', 'longitude', 'etaRaw', 'portId', 'heading'])

# Convert time to numeric and scale all features
X["time"] = pd.to_numeric(X["time"], errors='coerce')
X_scaled = scaler.fit_transform(X)

# Train-test split for latitude and longitude
X_train, X_test, y_train_lat, y_test_lat = train_test_split(X_scaled, y_lat, test_size=0.2, random_state=42)
X_train_lon, X_test_lon, y_train_lon, y_test_lon = train_test_split(X_scaled, y_lon, test_size=0.2, random_state=42)

# Step 2: Build the neural network
def build_neural_net(input_dim):
    model = Sequential()

    # Input layer + first hidden layer with 128 neurons
    model.add(Dense(128, activation='relu', input_dim=input_dim))

    # Second hidden layer with 64 neurons
    model.add(Dense(64, activation='relu'))

    # Third hidden layer with 32 neurons
    model.add(Dense(32, activation='relu'))

    # Output layer with 2 neurons (for latitude and longitude)
    model.add(Dense(2, activation='linear'))  # Linear activation for regression

    # Compile the model with Adam optimizer and MSE loss
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse', metrics=['mae'])
    
    return model

# Initialize the model
input_dim = X_train.shape[1]  # Number of features
model = build_neural_net(input_dim)

# Step 3: Train the model
# We need to concatenate latitude and longitude into a single target variable
y_train_combined = np.column_stack((y_train_lat, y_train_lon))  # Combine lat and lon into one array
y_test_combined = np.column_stack((y_test_lat, y_test_lon))

# Train the model
history = model.fit(X_train, y_train_combined, epochs=50, batch_size=32, validation_split=0.2)

# Step 4: Evaluate the model
# Predict on the test data
y_pred_combined = model.predict(X_test)

# Split the predicted lat and lon
y_pred_lat = y_pred_combined[:, 0]
y_pred_lon = y_pred_combined[:, 1]

# Evaluate the latitude prediction
print('Neural Network Latitude Prediction Results:')
mse_lat = mean_squared_error(y_test_lat, y_pred_lat)
mae_lat = mean_absolute_error(y_test_lat, y_pred_lat)
r2_lat = r2_score(y_test_lat, y_pred_lat)
print(f'Mean Squared Error (Latitude): {mse_lat:.4f}')
print(f'Mean Absolute Error (Latitude): {mae_lat:.4f}')
print(f'R² Score (Latitude): {r2_lat:.4f}')

# Evaluate the longitude prediction
print('Neural Network Longitude Prediction Results:')
mse_lon = mean_squared_error(y_test_lon, y_pred_lon)
mae_lon = mean_absolute_error(y_test_lon, y_pred_lon)
r2_lon = r2_score(y_test_lon, y_pred_lon)
print(f'Mean Squared Error (Longitude): {mse_lon:.4f}')
print(f'Mean Absolute Error (Longitude): {mae_lon:.4f}')
print(f'R² Score (Longitude): {r2_lon:.4f}')

# Step 5: Make predictions for the test set
X_test_scaled = scaler.transform(testdf)  # Don't forget to scale the test set
test_predictions = model.predict(X_test_scaled)

# Split latitude and longitude predictions
lat_pred = test_predictions[:, 0]
lon_pred = test_predictions[:, 1]

# Step 6: Create submission file
submission(sampledf, lat_pred, lon_pred, True, "nn_prediction")
