## Sample Notebook for making predictions of mlcsv_future data

In [1]:
import numpy as np
import pandas as pd
from getpass import getpass
from sqlalchemy import create_engine,Table, MetaData, text, func
from sqlalchemy.orm import sessionmaker
from sqlalchemy.orm import Session
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import session
import sqlalchemy_utils
from sqlalchemy_utils import database_exists, create_database
import psycopg2
from sqlalchemy import Table, Column, Integer, String, MetaData, ForeignKey
from sqlalchemy import inspect
import pyodbc 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
import pandas as pd
import tensorflow as tf
from tensorflow import keras


### Read Test Future data into DataFrame

In [2]:
mlcsv_future_df = pd.read_csv('../csv Data/test_neo_public_mlcsv_future.csv')

In [3]:
mlcsv_future_df.tail(10)

Unnamed: 0,spkid,full_name,pdes,h,orbit_id,epoch,epoch.mjd,epoch.cal,e,a,...,orbit_id_cad,jd,cd,dist,dist_min,dist_max,v_rel,v_inf,t_sigma_f,h_cad
490,2171486,171486 (1996 MO),171486,18.41,JPL 86,2459600.5,59600,2022-01-21.0,0.5128,1.794,...,86,2476392.0,2068-01-11 09:06:00.000000,0.168911,0.168909,0.168913,15.738282,15.73728,< 00:01,18.41
491,2171576,171576 (1999 VP11),171576,18.6,JPL 88,2459600.5,59600,2022-01-21.0,0.5835,1.086,...,88,2486543.0,2095-10-27 07:31:00.000000,0.118106,0.118096,0.118115,24.171065,24.170132,< 00:01,18.6
492,2171839,171839 (2001 JM1),171839,19.02,JPL 86,2459600.5,59600,2022-01-21.0,0.3111,1.461,...,86,2482346.0,2084-04-30 16:02:00.000000,0.065105,0.065104,0.065105,10.05135,10.047278,< 00:01,19.02
493,2172034,172034 (2001 WR1),172034,17.76,JPL 174,2459600.5,59600,2022-01-21.0,0.2025,1.277,...,174,2480677.0,2079-10-05 12:22:00.000000,0.172696,0.172694,0.172698,13.054377,13.053195,< 00:01,17.76
494,2172678,172678 (2003 YM137),172678,18.7,JPL 53,2459600.5,59600,2022-01-21.0,0.6902,2.595,...,53,2486777.0,2096-06-17 02:26:00.000000,0.077463,0.077163,0.077764,15.173872,15.171605,00:56,18.7
495,2172722,172722 (2004 BV102),172722,17.53,JPL 108,2459600.5,59600,2022-01-21.0,0.6995,1.544,...,108,2481653.0,2082-06-07 06:22:00.000000,0.164985,0.164955,0.165014,19.686971,19.686151,00:04,17.53
496,2173561,173561 (2000 YV137),173561,18.32,JPL 133,2459600.5,59600,2022-01-21.0,0.311,1.448,...,134,2478424.0,2073-08-04 10:22:00.000000,0.117412,0.11741,0.117414,17.114913,17.113587,< 00:01,18.32
497,2173664,173664 (2001 JU2),173664,19.68,JPL 90,2459600.5,59600,2022-01-21.0,0.2689,1.517,...,90,2473318.0,2059-08-12 20:08:00.000000,0.110105,0.110105,0.110105,2.926271,2.91799,< 00:01,19.68
498,2174050,174050 (2002 CC19),174050,17.61,JPL 163,2459600.5,59600,2022-01-21.0,0.1134,1.285,...,163,2483720.0,2088-02-03 22:09:00.000000,0.174064,0.174063,0.174064,25.364374,25.36377,< 00:01,17.61
499,2175114,175114 (2004 QQ),175114,16.55,JPL 130,2459600.5,59600,2022-01-21.0,0.6639,2.249,...,132,2483990.0,2088-10-30 20:05:00.000000,0.179532,0.17953,0.179535,21.739008,21.738325,< 00:01,16.55


In [4]:
# Create new dataframe with descriptive meta-data column for Visualization
descr_future_df = mlcsv_future_df.loc[:, ['spkid', 'full_name', 'pdes', 'class', 'first_obs', 'last_obs', 'n_obs_used']]
descr_future_df.tail(10)


Unnamed: 0,spkid,full_name,pdes,class,first_obs,last_obs,n_obs_used
490,2171486,171486 (1996 MO),171486,APO,1996-06-23,2021-11-24,275
491,2171576,171576 (1999 VP11),171576,APO,1999-11-07,2019-01-21,392
492,2171839,171839 (2001 JM1),171839,APO,2001-05-12,2021-11-30,561
493,2172034,172034 (2001 WR1),172034,AMO,1953-02-14,2021-05-21,783
494,2172678,172678 (2003 YM137),172678,APO,2003-12-21,2007-12-16,336
495,2172722,172722 (2004 BV102),172722,APO,2004-01-31,2020-04-02,469
496,2173561,173561 (2000 YV137),173561,APO,2000-12-01,2021-12-03,642
497,2173664,173664 (2001 JU2),173664,AMO,2001-05-15,2021-01-18,263
498,2174050,174050 (2002 CC19),174050,AMO,1986-02-07,2021-09-11,759
499,2175114,175114 (2004 QQ),175114,APO,1997-12-28,2021-12-27,569


#### Define list of features for making future predictions


In [5]:
# Define feature columns to subset dataframe
feature_columns = ["moid","moid_ld","h","v_rel","v_inf"]

In [6]:
output_df = mlcsv_future_df.loc[:, feature_columns]

In [7]:
output_df.dtypes

moid       float64
moid_ld    float64
h          float64
v_rel      float64
v_inf      float64
dtype: object

In [8]:
# Extract feature variables from future data set
X = output_df
X

Unnamed: 0,moid,moid_ld,h,v_rel,v_inf
0,0.1490,58.00,10.43,5.974914,5.972469
1,0.0815,31.70,13.87,11.268948,11.267526
2,0.1060,41.40,17.39,7.564784,7.562277
3,0.0342,13.30,16.35,28.169786,28.167613
4,0.0309,12.00,15.29,16.694264,16.693424
...,...,...,...,...,...
495,0.1030,40.10,17.53,19.686971,19.686151
496,0.0206,8.02,18.32,17.114913,17.113587
497,0.1100,43.00,19.68,2.926271,2.917990
498,0.1600,62.30,17.61,25.364374,25.363770


In [9]:
# Create a StandardScaler instances for feature variabbles
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X)

# Scale the data
X_predict_scaled = X_scaler.transform(X)


In [10]:
# Load the Neural Networks Model "nn"
forecast_model = tf.keras.models.load_model('NEOs_project_NN.h5')


In [16]:
# Evaluate the forecast model using the future data
predictions = forecast_model.predict(X)

In [17]:
predictions

array([[0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],
       [0.],

In [18]:
# making a data frame from predictions of future CAD and NEOs data
prediction_df = pd.DataFrame(predictions, columns = ["predicted_pha"])

In [19]:
prediction_df

Unnamed: 0,predicted_pha
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0
...,...
495,0.0
496,0.0
497,0.0
498,0.0


In [None]:
# Add new prediction column to dataframe
output_df['PHA_prediction'] = prediction_series

In [None]:
# Merge output dataframe with prediction column to the descr_future_df dataframe
new_output_df = pd.merge(descr_future_df, output_df, how='inner', left_index=True, right_index=True)

In [None]:
new_output_df['PHA_prediction'].value_counts()

In [None]:
new_output_df.head()

In [None]:
new_output_df.to_csv('test_future_predictions_data.csv', index=False)