## Data Preprocessing

In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import joblib

In [22]:
# Load the dataset
data = pd.read_excel('data/space_traffic.xlsx')

In [23]:
# Check for missing values in the dataset
missing_values = data.isnull().sum()

# Display the missing values count for each column
print(missing_values)

Timestamp          0
Location           0
Object_Type        0
Traffic_Density    0
Peak_Time          0
dtype: int64


In [24]:
# Clean column names by stripping any extra spaces or special characters
data.columns = data.columns.str.strip()

In [25]:
# Convert 'Timestamp' to datetime format
data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')

# Extract Year, Month, Day, and Time from the Timestamp
data['Year'] = data['Timestamp'].dt.year
data['Month'] = data['Timestamp'].dt.month
data['Day'] = data['Timestamp'].dt.day
data['Time'] = data['Timestamp'].dt.strftime('%H:%M:%S')

In [26]:
# One-hot encoding on 'Object_Type'
data_encoded = pd.get_dummies(data, columns=['Object_Type'], drop_first=False)
data_encoded['Object_Type'] = data['Object_Type']
data = data_encoded.copy()

In [27]:
# Label Encoding on 'Location'
label_encoder = LabelEncoder()

# Fit the label encoder and transform the 'Location' column
data['Location_Encoded'] = label_encoder.fit_transform(data['Location'])

# Check the encoded data and label mapping
label_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))

print(data[['Location', 'Location_Encoded']])
print("\nLabel Mapping:")
print(label_mapping)

# Save the LabelEncoder object 
joblib.dump(label_encoder, 'model/label_encoder.joblib')

print("Label Encoder saved successfully.")


                Location  Location_Encoded
0      Lagrange Point L2                 1
1              Orbit LEO                 4
2              Orbit LEO                 4
3    Mars Transfer Orbit                 2
4      Lagrange Point L1                 0
..                   ...               ...
995            Orbit MEO                 5
996    Lagrange Point L1                 0
997            Orbit MEO                 5
998    Lagrange Point L2                 1
999    Lagrange Point L2                 1

[1000 rows x 2 columns]

Label Mapping:
{'Lagrange Point L1': np.int64(0), 'Lagrange Point L2': np.int64(1), 'Mars Transfer Orbit': np.int64(2), 'Orbit GEO': np.int64(3), 'Orbit LEO': np.int64(4), 'Orbit MEO': np.int64(5)}
Label Encoder saved successfully.


In [28]:
data.head()

Unnamed: 0,Timestamp,Location,Traffic_Density,Peak_Time,Year,Month,Day,Time,Object_Type_Asteroid Mining Ship,Object_Type_Manned Spacecraft,Object_Type_Satellite,Object_Type_Scientific Probe,Object_Type_Space Debris,Object_Type_Space Station,Object_Type,Location_Encoded
0,2024-10-21 21:00:00,Lagrange Point L2,17,15:00:00,2024,10,21,21:00:00,False,False,False,False,False,True,Space Station,1
1,2024-10-11 05:00:00,Orbit LEO,21,15:00:00,2024,10,11,05:00:00,False,False,True,False,False,False,Satellite,4
2,2024-10-29 13:00:00,Orbit LEO,88,06:00:00,2024,10,29,13:00:00,False,False,False,False,False,True,Space Station,4
3,2024-10-24 08:00:00,Mars Transfer Orbit,65,08:00:00,2024,10,24,08:00:00,False,False,False,True,False,False,Scientific Probe,2
4,2024-10-23 17:00:00,Lagrange Point L1,9,06:00:00,2024,10,23,17:00:00,False,False,False,False,False,True,Space Station,0


In [29]:
data.shape

(1000, 16)

In [30]:
data.to_excel('data/processed_data.xlsx')