In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from datetime import datetime
import warnings

# Suppress warnings about invalid feature names
warnings.filterwarnings("ignore", message="X does not have valid feature names")

# Load the dataset
# Assuming the dataset is in a CSV file and contains columns user_id, timestamp, latitude, longitude, speed, and direction
data = pd.read_csv('user_location_data.csv')

# Preprocess the data
# Convert timestamp to datetime and extract useful features such as day of the week and hour of the day
data['timestamp'] = pd.to_datetime(data['timestamp'])
data['day_of_week'] = data['timestamp'].dt.dayofweek
data['hour_of_day'] = data['timestamp'].dt.hour

# Feature engineering: add velocity vector based on speed and direction
data['velocity_x'] = data['speed'] * np.cos(np.radians(data['direction']))
data['velocity_y'] = data['speed'] * np.sin(np.radians(data['direction']))

# Define feature columns
features = ['latitude', 'longitude', 'speed', 'direction', 'day_of_week', 'hour_of_day', 'velocity_x', 'velocity_y']
X = data[features]
y_lat = data['latitude']
y_lon = data['longitude']

# Split the data into training and testing sets
X_train, X_test, y_lat_train, y_lat_test, y_lon_train, y_lon_test = train_test_split(
    X, y_lat, y_lon, test_size=0.2, random_state=42
)

# Standardize the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert scaled data back to DataFrames for compatibility with DataFrame features
X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=features)
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=features)

# Train the model
# Using Random Forest Regressor as an example
model_lat = RandomForestRegressor(n_estimators=100, random_state=42)
model_lon = RandomForestRegressor(n_estimators=100, random_state=42)

model_lat.fit(X_train_scaled_df, y_lat_train)
model_lon.fit(X_train_scaled_df, y_lon_train)

# Evaluate the model
y_lat_pred = model_lat.predict(X_test_scaled_df)
y_lon_pred = model_lon.predict(X_test_scaled_df)

lat_mse = mean_squared_error(y_lat_test, y_lat_pred)
lon_mse = mean_squared_error(y_lon_test, y_lon_pred)
lat_mae = mean_absolute_error(y_lat_test, y_lat_pred)
lon_mae = mean_absolute_error(y_lon_test, y_lon_pred)

print(f'Latitude MSE: {lat_mse}, MAE: {lat_mae}')
print(f'Longitude MSE: {lon_mse}, MAE: {lon_mae}')

# Use the model to predict the next location
def predict_next_location(model_lat, model_lon, input_features):
    # Convert input features to a DataFrame
    input_df = pd.DataFrame([input_features], columns=features)
    
    # Scale the input features
    input_features_scaled = scaler.transform(input_df)
    
    # Predict the next latitude and longitude
    predicted_latitude = model_lat.predict(input_features_scaled)[0]
    predicted_longitude = model_lon.predict(input_features_scaled)[0]
    
    return predicted_latitude, predicted_longitude

# Example input features for a specific user at a specific time
# Replace these values with the user's current data
example_input_features = {
    'latitude': 37.7164219384989,
    'longitude': -122.36183093881566,
    'speed': 10,  # speed in m/s
    'direction': 90,  # direction in degrees
    'day_of_week': datetime.now().weekday(),  # current day of the week
    'hour_of_day': datetime.now().hour,  # current hour of the day
    'velocity_x': 10 * np.cos(np.radians(90)),
    'velocity_y': 10 * np.sin(np.radians(90))
}

# Predict the next location
predicted_lat, predicted_lon = predict_next_location(model_lat, model_lon, list(example_input_features.values()))
print(f'Predicted next location: Latitude = {predicted_lat}, Longitude = {predicted_lon}')


Latitude MSE: 1.0980135886168336e-08, MAE: 7.950132648414154e-05
Longitude MSE: 1.6793152342611423e-08, MAE: 9.846328073500388e-05
Predicted next location: Latitude = 37.71642661038898, Longitude = -122.36177981025226


In [19]:
import plotly.express as px

# Create a DataFrame with the specific location
df = pd.DataFrame({
    'Lat': [example_input_features.get('latitude'), predicted_lat],
    'Long': [example_input_features.get('longitude'), predicted_lon],
    'Location': ['Current Location','Next Predicted Location'],
    'Size': [20,20]
})

# Define the color scale
color_scale = [(0, 'orange'), (1, 'red')]

# Create the scatter mapbox figure
fig = px.scatter_mapbox(df, 
                        lat="Lat", 
                        lon="Long", 
                        hover_name="Location", 
                        color_discrete_sequence=["red","green"],
                        size='Size',
                        zoom=14, 
                        height=800,
                        width=800)

# Update the layout to use OpenStreetMap and set margins
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})

# Show the figure
fig.show()
