# XGBoost Regression for Latitude and Longitude Prediction
This notebook demonstrates how to use XGBoost to predict 'longitude' and 'latitude' from 'hour' and 'speed' columns in Master5G.csv.

In [7]:
# Import required libraries
import pandas as pd
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import joblib
import numpy as np
import torch

In [2]:
# Load the dataset
df = pd.read_csv('Master5G.csv')
df.head()

  df = pd.read_csv('Master5G.csv')


Unnamed: 0,time,Day,Year,Month,Date,hour,min,sec,timezone,latitude,...,Retransmissions,CWnd,cwnd_unit,Role-RX,Transfer size-RX,Transfer unit-RX,Bitrate-RX,bitrate_unit-RX,send_data,square_id
0,2022-07-03 19:43:37,Mon,2022.0,7.0,4.0,5.0,43.0,37.0,AEST,-37.737985,...,0.0,0.00708,MBytes,RX-C,0.988281,MBytes,8.29,Mbits/sec,0.0,square_94489280583
1,2022-07-03 19:43:38,Mon,2022.0,7.0,4.0,5.0,43.0,38.0,AEST,-37.738127,...,3.0,0.010645,MBytes,RX-C,0.972656,MBytes,8.16,Mbits/sec,0.0,square_94489280583
2,2022-07-03 19:43:39,Mon,2022.0,7.0,4.0,5.0,43.0,39.0,AEST,-37.738249,...,0.0,0.010645,MBytes,RX-C,0.957031,MBytes,8.03,Mbits/sec,0.0,square_94489280583
3,2022-07-03 19:43:40,Mon,2022.0,7.0,4.0,5.0,43.0,40.0,AEST,-37.738362,...,0.0,0.00708,MBytes,RX-C,0.953125,MBytes,7.99,Mbits/sec,0.014404,square_94489280583
4,2022-07-03 19:43:41,Mon,2022.0,7.0,4.0,5.0,43.0,41.0,AEST,-37.738491,...,6.0,0.007588,MBytes,RX-C,0.927734,MBytes,7.78,Mbits/sec,0.0,square_94489280583


In [3]:
# Prepare features and targets
X = df[['hour', 'speed']]
y_long = df['longitude']
y_lat = df['latitude']

In [4]:
# Split data into train and test sets
X_train, X_test, y_long_train, y_long_test = train_test_split(X, y_long, test_size=0.2, random_state=42)
_, _, y_lat_train, y_lat_test = train_test_split(X, y_lat, test_size=0.2, random_state=42)

In [5]:
# Train XGBoost regressors
xgb_long = XGBRegressor()
xgb_lat = XGBRegressor()
xgb_long.fit(X_train, y_long_train)
xgb_lat.fit(X_train, y_lat_train)

In [6]:
# Predict and evaluate
y_long_pred = xgb_long.predict(X_test)
y_lat_pred = xgb_lat.predict(X_test)
mse_long = mean_squared_error(y_long_test, y_long_pred)
mse_lat = mean_squared_error(y_lat_test, y_lat_pred)
print(f'Longitude MSE: {mse_long}')
print(f'Latitude MSE: {mse_lat}')

Longitude MSE: 0.0008919363304064929
Latitude MSE: 0.0009780200117029006


In [8]:
# Save the trained models as .pt files
# Convert XGBoost models to their underlying booster and save as bytes
xgb_long.save_model('xgb_longitude_model.pt')
xgb_lat.save_model('xgb_latitude_model.pt')
print('Models saved as xgb_longitude_model.pt and xgb_latitude_model.pt')

Models saved as xgb_longitude_model.pt and xgb_latitude_model.pt


  self.get_booster().save_model(fname)


In [9]:
# Save (value, prediction) pairs for longitude and latitude to CSV
results_df = pd.DataFrame({
    'longitude_true': np.array(y_long_test),
    'longitude_pred': y_long_pred,
    'latitude_true': np.array(y_lat_test),
    'latitude_pred': y_lat_pred
})
results_df.to_csv('latlon_predictions_vs_true.csv', index=False)
results_df.head()

Unnamed: 0,longitude_true,longitude_pred,latitude_true,latitude_pred
0,144.846505,144.805817,-37.742313,-37.759483
1,144.813258,144.796143,-37.704514,-37.760555
2,144.78655,144.796997,-37.8208,-37.759407
3,144.811472,144.797638,-37.760298,-37.758514
4,144.765277,144.790314,-37.742556,-37.758984
