In [7]:
import pandas as pd
import numpy as np
from joblib import load
from sklearn.preprocessing import StandardScaler, MinMaxScaler
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

In [14]:
x_scaler = MinMaxScaler()
y_scaler = MinMaxScaler()
features = ['event_id','time_to_tca', 'c_time_lastob_end', 'c_time_lastob_start', 'max_risk_estimate', 'c_ctdot_r',
'c_obs_used', 'miss_distance', 'c_obs_available', 'c_recommended_od_span', 'event_length', 'c_actual_od_span',
'c_cr_area_over_mass', 't_h_per', 'relative_velocity_t', 'relative_speed', 't_rcs_estimate',
'c_cd_area_over_mass', 'c_crdot_t', 'c_sigma_t', 'c_sigma_rdot', 'max_risk_scaling', 't_span','risk']
timestep_X = 5
timestep_y = 1
df = pd.read_csv('test_data.csv')
fill_X = 0

In [15]:
#Dropping first the empty column and then rows with NaNs
df = df.drop("c_rcs_estimate", axis=1)
df = df.dropna(how='any')

#Filtering events with len=1 or min_tca > 3 or max_tca < 3
def conditions(event):
    x = event["time_to_tca"].values
    return ((x.min()<3.0) & (x.max()>3.0))

df = df.groupby('event_id').filter(conditions)

#OHE for c_object_type (5 categories) -> 5 new features
df["mission_id"] = df["mission_id"].astype('category')
df["c_object_type"] = df["c_object_type"].astype('category')
df = pd.get_dummies(df)

#Adding feature 'event_length' for counting how many instances each event has
df["event_length"] = df.groupby('event_id')['event_id'].transform('count')

#Scaling data
_ = y_scaler.fit(df["risk"].values.reshape(-1, 1)) #for later use for scaling prediction
df = pd.DataFrame(x_scaler.fit_transform(df), columns=df.columns)

#Transforming dataframe into a 3D-array with overlapping windows
data = []
timestep = timestep_X + timestep_y
def df_to_3darray(event):
    global data
    event = event.values
    if(timestep>=event.shape[0]):
        pad_shape = timestep + 1
        pad_event = np.zeros((pad_shape,event.shape[1]))
        pad_event.fill(fill_X)
        pad_event[-event.shape[0]:,:] = event
        event = pad_event
    for i in range(event.shape[0]-timestep):
        data.append(event[i:i+timestep,:])
    return event
df = df[features]
df.groupby("event_id").apply(df_to_3darray)
data = np.array(data)

#Dividing X and y
target_feture = list(df.columns).index("risk")
X, y = data[:,:timestep_X,:], data[:,timestep_X:,target_feture]

#Dropping event_id to remove noise
X = X[:,:,1:23]

#TODO: Padding with specific values column-wise instead of fixed number.
#TODO: Separating time dependent and independent feature in 2 X arrays

print(X.shape, y.shape)
used_features = features[1:-1]
last_row = X[-1]
print(X[-1,1])
print(y[-1])
# Print the shape of X_train for reference
print("Shape of X_train:", X.shape)

# Mapping each value in the last row of X_train to its corresponding feature
print("\nMapping each value in the last row of X_train to its corresponding feature:")

for feature_index, feature_name in enumerate(used_features):
    value = last_row[-1, feature_index]  # Accessing the last row correctly
    print("Feature {}: {}".format(feature_name, value))
print(X.shape)
df


(11454, 5, 22) (11454, 1)
[3.38912496e-01 1.00000000e+00 1.00000000e+00 4.13649031e-01
 2.72412844e-01 1.47895336e-02 5.38941235e-01 1.13895216e-02
 6.40218759e-02 9.28571429e-01 2.98064274e-01 1.31522294e-02
 3.90167586e-01 5.08766457e-02 8.95313055e-01 9.58325730e-02
 4.01097375e-02 1.38422437e-06 1.10433304e-03 9.85131711e-04
 1.14191704e-05 3.25968852e-01]
[0.156741]
Shape of X_train: (11454, 5, 22)

Mapping each value in the last row of X_train to its corresponding feature:
Feature time_to_tca: 0.13661625720688797
Feature c_time_lastob_end: 1.0
Feature c_time_lastob_start: 1.0
Feature max_risk_estimate: 0.41534764413638936
Feature c_ctdot_r: 0.25269473547879917
Feature c_obs_used: 0.014789533560864617
Feature miss_distance: 0.5226758917869119
Feature c_obs_available: 0.011389521640091115
Feature c_recommended_od_span: 0.06402187594947698
Feature event_length: 0.9285714285714286
Feature c_actual_od_span: 0.2980642736327758
Feature c_cr_area_over_mass: 0.013152229374268378
Feature t

Unnamed: 0,event_id,time_to_tca,c_time_lastob_end,c_time_lastob_start,max_risk_estimate,c_ctdot_r,c_obs_used,miss_distance,c_obs_available,c_recommended_od_span,event_length,c_actual_od_span,c_cr_area_over_mass,t_h_per,relative_velocity_t,relative_speed,t_rcs_estimate,c_cd_area_over_mass,c_crdot_t,c_sigma_t,c_sigma_rdot,max_risk_scaling,t_span,risk
0,0.0,0.971102,1.0,1.0,0.323030,0.000231,0.012514,0.482437,0.010251,0.122401,0.214286,0.550648,0.006796,0.731516,0.734467,0.465795,0.015053,0.039669,0.000017,0.000442,0.000375,4.295785e-07,0.050706,0.797521
1,0.0,0.916894,1.0,1.0,0.324067,0.000224,0.012514,0.471498,0.010251,0.122401,0.214286,0.550648,0.006796,0.731513,0.734467,0.465795,0.015053,0.039669,0.000017,0.000439,0.000373,4.227290e-07,0.050706,0.798029
2,0.0,0.824870,1.0,1.0,0.322011,0.000240,0.012514,0.488294,0.010251,0.122401,0.214286,0.550648,0.006796,0.731517,0.734467,0.465795,0.015053,0.039669,0.000017,0.000443,0.000376,4.383169e-07,0.050706,0.796850
3,0.0,0.786556,1.0,1.0,0.320779,0.000250,0.012514,0.498551,0.010251,0.122401,0.214286,0.550648,0.006796,0.731517,0.734467,0.465795,0.015053,0.039669,0.000017,0.000446,0.000378,4.482000e-07,0.050706,0.796215
4,0.0,0.045838,1.0,1.0,0.320938,0.000253,0.012514,0.497246,0.010251,0.122401,0.214286,0.550648,0.006796,0.731520,0.734467,0.465795,0.015053,0.039669,0.000017,0.000444,0.000377,4.476549e-07,0.050706,0.796281
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20914,1.0,0.268997,1.0,1.0,0.412168,0.288109,0.014790,0.550382,0.011390,0.064022,0.928571,0.298064,0.013152,0.390149,0.050877,0.895313,0.095833,0.040110,0.000001,0.001146,0.001022,1.137196e-05,0.325969,0.180288
20915,1.0,0.191150,1.0,1.0,0.416942,0.234620,0.014790,0.517032,0.011390,0.064022,0.928571,0.298064,0.013152,0.390222,0.050877,0.895313,0.095833,0.040110,0.000002,0.001025,0.000914,1.147607e-05,0.325969,0.175198
20916,1.0,0.136616,1.0,1.0,0.415348,0.252695,0.014790,0.522676,0.011390,0.064022,0.928571,0.298064,0.013152,0.390204,0.050877,0.895313,0.095833,0.040110,0.000002,0.001045,0.000932,1.170886e-05,0.325969,0.160295
20917,1.0,0.053585,1.0,1.0,0.416145,0.251840,0.014790,0.521523,0.011390,0.064022,0.928571,0.298064,0.013152,0.390211,0.050877,0.895313,0.095833,0.040110,0.000002,0.001041,0.000928,1.177022e-05,0.325969,0.156741


In [26]:

x_test = X[-1]
x_test = x_test.reshape(1, x_test.shape[0], x_test.shape[1])
print(x_test.shape)
np.save('x_test.npy', x_test)


(1, 5, 22)


Predicted risk: {} [[0.36361235]]


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

In [None]:
def foo():
    return x_test