In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Install
! pip install scikit-optimize

Collecting scikit-optimize
  Downloading scikit_optimize-0.8.1-py2.py3-none-any.whl (101 kB)
[?25l[K     |███▎                            | 10 kB 24.2 MB/s eta 0:00:01[K     |██████▌                         | 20 kB 30.1 MB/s eta 0:00:01[K     |█████████▊                      | 30 kB 18.4 MB/s eta 0:00:01[K     |█████████████                   | 40 kB 12.4 MB/s eta 0:00:01[K     |████████████████▏               | 51 kB 5.3 MB/s eta 0:00:01[K     |███████████████████▍            | 61 kB 5.5 MB/s eta 0:00:01[K     |██████████████████████▊         | 71 kB 5.3 MB/s eta 0:00:01[K     |██████████████████████████      | 81 kB 6.0 MB/s eta 0:00:01[K     |█████████████████████████████▏  | 92 kB 5.8 MB/s eta 0:00:01[K     |████████████████████████████████| 101 kB 4.1 MB/s 
Collecting pyaml>=16.9
  Downloading pyaml-20.4.0-py2.py3-none-any.whl (17 kB)
Installing collected packages: pyaml, scikit-optimize
Successfully installed pyaml-20.4.0 scikit-optimize-0.8.1


In [3]:
# Imports
import numpy as np
import pandas as pd
from tqdm import tqdm  
from sklearn.preprocessing import StandardScaler
from sklearn import tree
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
import xgboost as xgb
from skopt import BayesSearchCV 
import warnings
warnings.filterwarnings('ignore', message='The objective has been evaluated at this point before.')

In [4]:
# Load the TLE data from local system
df = pd.read_csv("/content/drive/MyDrive/SSA/data/deception_detection_catalogue_v1.csv") 
df.drop(df.columns[0], axis=1, inplace=True)

df_test = pd.read_csv("/content/drive/MyDrive/SSA/data/deception_detection_catalogue.csv") 
df_test.drop(df_test.columns[0], axis=1, inplace=True)

In [5]:
df_total = df.append(df_test, ignore_index=True)
df_ = df.drop(['OBJECT_NAME', 'NORAD_ID', 'YEAR LAUNCH', 'EPOCH','DEB_LABELS'], axis=1)
df_total_ = df_total.drop(['OBJECT_NAME', 'NORAD_ID', 'YEAR LAUNCH', 'EPOCH','DEB_LABELS'], axis=1)

In [6]:
scalar = StandardScaler()
scalar_total = StandardScaler()

In [7]:
scalar.fit(df.drop(['OBJECT_NAME', 'NORAD_ID', 'YEAR LAUNCH', 'EPOCH','DEB_LABELS'], axis=1))
scalar_total.fit(df_total.drop(['OBJECT_NAME', 'NORAD_ID', 'YEAR LAUNCH', 'EPOCH','DEB_LABELS'], axis=1))

StandardScaler(copy=True, with_mean=True, with_std=True)

In [8]:
scaled_features = scalar.transform(df.drop(['OBJECT_NAME', 'NORAD_ID', 'YEAR LAUNCH', 'EPOCH','DEB_LABELS'], axis=1))
scaled_features_total = scalar.transform(df_total.drop(['OBJECT_NAME', 'NORAD_ID', 'YEAR LAUNCH', 'EPOCH','DEB_LABELS'], axis=1))

df_features = pd.DataFrame(scaled_features, columns=df.columns[4:-1])
df_features_total = pd.DataFrame(scaled_features_total, columns=df_total.columns[4:-1])
df_features.head()

Unnamed: 0,INCLINATION(i),RIGHT ASCENSION ASCENDING NODE(w),ECCENTRICITY(e),ARGUMENT PERIGEE(omega),FIRST DERIVATIVE MEAN MOTION(n_dot),SECOND DERIVATIVE MEAN MOTION(n_ddot),MEAN ANOMALY(M),MEAN MOTION(n),DRAG TERM(B*),REVOLUTION(rev)
0,-1.139129,0.597003,0.79783,-1.395398,-0.005684,-0.010214,1.445475,-0.125293,-0.014096,-0.087096
1,-1.139126,0.625838,0.79784,-1.43895,-0.005683,-0.010214,1.475081,-0.125293,-0.013766,-0.090506
2,-1.181341,-1.395677,0.566061,-0.630292,-0.005647,-0.010214,0.770208,0.072053,-0.008568,1.470416
3,-1.181341,-1.363553,0.56607,-0.680241,-0.005647,-0.010214,0.821345,0.072053,-0.008634,1.435381
4,-1.181345,-1.331428,0.566081,-0.730192,-0.005649,-0.010214,0.871123,0.072052,-0.008743,1.402848


In [9]:
X_train, _, y_train, _ = train_test_split(df_features, df['DEB_LABELS'], test_size=0.2, random_state=20)
_, X_test, _, y_test = train_test_split(df_features_total[6543689:], df_total[6543689:]['DEB_LABELS'], test_size=0.99, random_state=20)

In [10]:
dtrain = xgb.DMatrix(X_train,label=y_train)
dtest = xgb.DMatrix(X_test)

In [11]:
def_params = {'min_child_weight': 1,
        'max_depth': 6,
        'subsample': 1.0,
        'colsample_bytree': 1.0,
        'reg_lambda': 1,
        'reg_alpha': 0,
        'learning-rate':0.3,
        'silent':1 # not a hyperparameter ,used to silence XGBoost
        }
cv_res = xgb.cv(def_params,dtrain,nfold=5)
cv_res.tail()


Unnamed: 0,train-rmse-mean,train-rmse-std,test-rmse-mean,test-rmse-std
5,0.283006,0.001515,0.283373,0.001526
6,0.275653,0.001264,0.276423,0.001328
7,0.269991,0.001506,0.270851,0.00158
8,0.265921,0.001228,0.266573,0.001232
9,0.262754,0.001005,0.263339,0.000983


In [None]:
params = {'min_child_weight': (0, 50,),
        'max_depth': (0, 10),
        'subsample': (0.5, 1.0),
        'colsample_bytree': (0.5, 1.0),
        'reg_lambda':(1e-5,100,'log-uniform'),
        'reg_alpha':(1e-5,100,'log-uniform'),
        'learning-rate':(0.01,0.2,'log-uniform')
        }

bayes = BayesSearchCV(xgb.XGBClassifier(),params,n_iter=10,scoring=None,cv=5,random_state=42)
res = bayes.fit(X_train,y_train)
print(res.best_params_)

In [None]:
final_params = {'colsample_bytree': 0.50, 
                'max_depth': 7, 
                'min_child_weight':13, 
                'reg_alpha': 0.112,
                'reg_lambda': 0.0008, 
                'subsample': 0.65,
                'eta':0.11,
                'silent':1}
                
cv_res = xgb.cv(final_params,dtrain,num_boost_round=1000,early_stopping_rounds=100,nfold=5)


In [None]:
cv_res.loc[30:,['train-rmse-mean','test-rmse-mean']].plot()

In [None]:
model = xgb.train(final_params,dtrain=dtrain,num_boost_round=151)
##final_pred=np.expm1(model_xgb.predict(dtest))

In [None]:
#model = xgb.XGBClassifier()
##random_search=RandomizedSearchCV(model,param_distributions=params,n_iter=10,scoring='roc_auc',n_jobs=-1,cv=5,verbose=3)

In [None]:
# Here we go
#random_search.fit(X_train,y_train)

In [None]:
#random_search.best_estimator_

In [None]:
#model = xgb.XGBClassifier()
#model.fit(X_train, y_train)

In [None]:
prediction_test = model.predict(X_test)

from sklearn import metrics
from sklearn.metrics import classification_report,confusion_matrix

print(classification_report(y_test,prediction_test))
print('Accuracy = ', metrics.accuracy_score(y_test, prediction_test))

In [None]:
starting_index_test_data = 6543689
recent_elset_index_over = 6543689 + 20765
start_cosmos2499 = 6543689 + 44588

print(df_total.iloc[start_cosmos2499,:])

In [None]:
init_range = 4000
fin_range = 5000

cosmos2499 = df_features_total[start_cosmos2499+init_range:start_cosmos2499+fin_range] # NORAD_ID = 39765 [44588:27365]

In [None]:
cosmos2499_pred = model.predict(cosmos2499)
print(cosmos2499_pred)

In [None]:
feature_list = list(df_features.columns)
feature_imp = pd.Series(model.feature_importances_, index=feature_list).sort_values(ascending=False)
print(feature_imp)