## Load Data

In [74]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib

In [75]:
# Load the dataset
data = pd.read_excel('data/processed_data.xlsx')
data.head()

Unnamed: 0.1,Unnamed: 0,Timestamp,Location,Traffic_Density,Peak_Time,Year,Month,Day,Time,Object_Type_Asteroid Mining Ship,Object_Type_Manned Spacecraft,Object_Type_Satellite,Object_Type_Scientific Probe,Object_Type_Space Debris,Object_Type_Space Station,Object_Type,Location_Encoded
0,0,2024-10-21 21:00:00,Lagrange Point L2,17,15:00:00,2024,10,21,21:00:00,False,False,False,False,False,True,Space Station,1
1,1,2024-10-11 05:00:00,Orbit LEO,21,15:00:00,2024,10,11,05:00:00,False,False,True,False,False,False,Satellite,4
2,2,2024-10-29 13:00:00,Orbit LEO,88,06:00:00,2024,10,29,13:00:00,False,False,False,False,False,True,Space Station,4
3,3,2024-10-24 08:00:00,Mars Transfer Orbit,65,08:00:00,2024,10,24,08:00:00,False,False,False,True,False,False,Scientific Probe,2
4,4,2024-10-23 17:00:00,Lagrange Point L1,9,06:00:00,2024,10,23,17:00:00,False,False,False,False,False,True,Space Station,0


In [76]:
# Features and target
features = ['Location_Encoded','Year', 'Month', 'Day', 
            'Object_Type_Asteroid Mining Ship', 'Object_Type_Manned Spacecraft', 
            'Object_Type_Satellite', 'Object_Type_Scientific Probe', 
            'Object_Type_Space Debris', 'Object_Type_Space Station']
target = 'Traffic_Density'

In [77]:
X=data[features]
y=data[target]

In [78]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Training and Testing

In [79]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import joblib

In [80]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Initialize the RandomForestRegressor
model = SVR()

# Train the model
model.fit(X_train, y_train)

In [81]:
# Predict on the training set
y_pred_train = model.predict(X_train)

# Evaluate the model on the training set using various metrics
mse_train = mean_squared_error(y_train, y_pred_train)
mae_train = mean_absolute_error(y_train, y_pred_train)
r2_train = r2_score(y_train, y_pred_train)

print(f"Training Set Mean Squared Error: {mse_train}")
print(f"Training Set Mean Absolute Error: {mae_train}")
print(f"Training Set R-squared: {r2_train}")

Training Set Mean Squared Error: 780.6619779905138
Training Set Mean Absolute Error: 23.771465830051067
Training Set R-squared: -0.00033297519984865076


In [82]:
# Predict on the test set
y_pred_test = model.predict(X_test)

# Evaluate the model on the test set using various metrics
mse_test = mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)
r2_test = r2_score(y_test, y_pred_test)

print(f"Test Set Mean Squared Error: {mse_test}")
print(f"Test Set Mean Absolute Error: {mae_test}")
print(f"Test Set R-squared: {r2_test}")

Test Set Mean Squared Error: 800.7477340473764
Test Set Mean Absolute Error: 24.73196848974163
Test Set R-squared: -2.6650753703894026e-06


## Hyperparameter Tuning

In [83]:
# Define the parameter grid for hyperparameter tuning
# param_grid = {
#     'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
#     'C': [0.1, 1, 10, 100],
#     'gamma': ['scale', 'auto'],
#     'degree': [2, 3, 4],
#     'epsilon': [0.1, 0.2, 0.5, 1]
# }

param_grid = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid']
}

# Initialize the RandomForestRegressor
hypertune_model = SVR()

# Initialize GridSearchCV with the RandomForestRegressor and the parameter grid
grid_search = GridSearchCV(estimator=hypertune_model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)

# Fit the GridSearchCV to the training data
grid_search.fit(X_train, y_train)

# Retrieve the best parameters
best_params = grid_search.best_params_
print("Best parameters found: ", best_params)

# Train the model with the best parameters
best_model = SVR(**best_params)
best_model.fit(X_train, y_train)


Fitting 3 folds for each of 4 candidates, totalling 12 fits
Best parameters found:  {'kernel': 'sigmoid'}


## Save Model

In [84]:
# Save based on available modle
model_to_save=None
try:
    model_to_save = best_model
except:
    model_to_save = model

# Save the trained model
joblib.dump(model_to_save, 'model/SVR.joblib')

print("Model RandomForestRegressor saved in 'model/SVR.joblib'")

Model RandomForestRegressor saved in 'model/SVR.joblib'


## Testing Prediction using Custom Input

In [85]:
import joblib
import pandas as pd

model = joblib.load('model/SVR.joblib')

location_encoder = joblib.load('model/label_encoder.joblib')  

input_data = {
    'Location': ['Lagrange Point L2'],  
    'Year': [2024],
    'Month': [10],
    'Day': [21],
    'Object_Type_Asteroid Mining Ship': [False],
    'Object_Type_Manned Spacecraft': [False],
    'Object_Type_Satellite': [False],
    'Object_Type_Scientific Probe': [False],
    'Object_Type_Space Debris': [False],
    'Object_Type_Space Station': [True]
}

input_df = pd.DataFrame(input_data)

try:
    input_df['Location_Encoded'] = location_encoder.transform(input_df['Location'])
except ValueError as e:
    print(f"Error encoding location: {e}. Ensure the input data matches the training data locations.")

input_df = input_df.drop('Location', axis=1)

columns_order = ['Location_Encoded','Year', 'Month', 'Day', 'Object_Type_Asteroid Mining Ship', 'Object_Type_Manned Spacecraft', 
            'Object_Type_Satellite', 'Object_Type_Scientific Probe', 'Object_Type_Space Debris', 
            'Object_Type_Space Station']

input_df = input_df[columns_order]

prediction = model.predict(input_df)

print("Predicted Traffic Density:", prediction[0])

Predicted Traffic Density: 48.89978352032321
