# <center> Orbit Classification </center>

## Importing libraries

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import warnings

from sklearn.metrics import accuracy_score,classification_report
%matplotlib inline
warnings.filterwarnings('ignore')

## Importing Data

In [None]:
data = pd.read_csv('learn_orbits_data.csv')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data['class'].unique()

## Data Description

a (AU) -- Semi-major axis of the orbit in AU <br>
e -- Eccentricity of the orbit <br>
i (deg) -- Inclination of the orbit with respect to the ecliptic plane and the equinox of J2000 (J2000-Ecliptic) in degrees <br>
w (deg) -- Argument of perihelion (J2000-Ecliptic) in degrees <br>
Node (deg) -- Longitude of the ascending node (J2000-Ecliptic) in degrees <br>
M (deg) -- Mean anomoly at epoch in degrees <br>
q (AU) -- Perihelion distance of the orbit in AU <br>
Q (AU) -- Aphelion distance of the orbit in AU <br>
P (yr) -- Orbital period in Julian years <br>
H (mag) -- Absolute V-magnitude <br>
MOID (AU) -- Minimum orbit intersection distance (the minimum distance between the osculating orbits of the NEO and the Earth <br>
class -- Object classification <br>

## Missing data

In [None]:
sns.heatmap(data.isnull())

## Data Correlation

In [None]:
plt.figure(figsize=(12,8))
sns.heatmap(data.iloc[:,:-1].corr(),annot=True)

## EDA

In [None]:
data.columns

In [None]:
sns.pairplot(data=data,hue='class')

In [None]:
plt.figure(figsize=(10,8),num=1)
plt.subplot(2,2,1)
sns.boxplot(x='class',y='a (AU)',palette='CMRmap',data=data)
plt.subplot(2,2,2)
sns.boxplot(x='class',y='i (deg)',palette='CMRmap',data=data)
plt.subplot(2,2,3)
sns.boxplot(x='class',y='w (deg)',palette='CMRmap',data=data)
plt.subplot(2,2,4)
sns.boxplot(x='class',y='Node (deg)',palette='CMRmap', data=data)

In [None]:
plt.figure(figsize=(10,8),num=1)
plt.subplot(2,2,1)
sns.boxplot(x='class',y='M (deg)',palette='CMRmap',data=data)
plt.subplot(2,2,2)
sns.boxplot(x='class',y='q (AU)',palette='CMRmap',data=data)
plt.subplot(2,2,3)
sns.boxplot(x='class',y='Q (AU)',palette='CMRmap',data=data)
plt.subplot(2,2,4)
sns.boxplot(x='class',y='P (yr)',palette='CMRmap', data=data)

## Data Split

In [None]:
X = data.drop('class',axis=1)
X = X.drop('Q (AU)',axis=1)
## Q is dropped due its extremely high correlation with a (AU)
Y = data['class']

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.3,random_state=104)

## Model Creation

In [None]:
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier()

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=1)

In [None]:
accuracy = []
for i in range(1,200):    
    kn = KNeighborsClassifier(n_neighbors=i)
    kn.fit(X_train,Y_train)
    predK = kn.predict(X_test)
    accuracy.append([accuracy_score(Y_test,predK),i])
temp = accuracy[0]
for m in accuracy:
    if temp[0] < m[0]:
        temp=m
kno = KNeighborsClassifier(n_neighbors=temp[1])

In [None]:
from sklearn.svm import SVC
svc = SVC()

In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1,1, 10, 100, 1000,2000], 'gamma': [1,0.1,0.01,0.001,0.0001], 'kernel': ['rbf']} 
grid = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)

In [None]:
from sklearn.metrics import accuracy_score,classification_report
accuracy = []
models = [lr,rfc,knn,kno,svc,grid]
for m in models:
    m.fit(X_train,Y_train)
    prediction = m.predict(X_test)
    accuracy.append([m,accuracy_score(Y_test,prediction)])

In [None]:
temp = accuracy[0]
for m in accuracy:
    if temp[1]<m[1]:
        temp=m

### The best method is thus a Random Forest Classifier as it gives us an accuracy of 99.62%

In [None]:
print(classification_report(Y_test,temp[0].predict(X_test)))
print("Accuracy of this model: ",round(accuracy_score(Y_test,temp[0].predict(X_test))*100,2),'%')

Downloading data from mongo to .json

In [None]:
import os
import json
from pymongo import MongoClient

uri = os.getenv("MONGO_URI")
if not uri:
    raise ValueError("MONGO_URI environment variable is not set")

client = MongoClient(uri)

db = client["NEO-Cluster"]
collection = db["asteroids"]

asteroids = collection.find()

asteroid_list = list(asteroids)

with open("new_orbits_data.json", mode="w", encoding="utf-8") as file:
    json.dump(asteroid_list, file, default=str, indent=4)

print("Dane zostały zapisane do pliku new_orbits_data.json")


Parsing json to csv in same format as learning csv

In [None]:
import json
import csv

input_json_file = 'new_orbits_data.json'
output_csv_file = 'our_orbits_data.csv'

with open(input_json_file, 'r') as file:
    data = json.load(file)

with open(output_csv_file, 'w', newline='') as csvfile:
    fieldnames = ['a (AU)', 'e', 'i (deg)', 'w (deg)', 'Node (deg)', 'M (deg)', 'q (AU)', 'Q (AU)', 'P (yr)', 'H (mag)', 'MOID (AU)', 'class']
    
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    
    writer.writeheader()
    
    for orbit in data:
        orbital_data_v = orbit.get("orbital_data", {})
        a = orbital_data_v.get('semi_major_axis', '')
        e = orbital_data_v.get('eccentricity', '')
        i = orbital_data_v.get('inclination', '')
        w = orbital_data_v.get('perihelion_argument', '')
        node = orbital_data_v.get('ascending_node_longitude', '')
        M = orbital_data_v.get('mean_anomaly', '')
        q = orbital_data_v.get('perihelion_distance', '')
        Q = orbital_data_v.get('aphelion_distance', '')
        P_days = orbital_data_v.get('orbital_period', '')
        H = orbit.get('absolute_magnitude_h', '')
        MOID = orbital_data_v.get('minimum_orbit_intersection', '')
        class_type = orbital_data_v.get('orbit_class', {}).get('orbit_class_type', '')
        
        P_yr = float(P_days) / 365.25 if P_days else ''
        
        row = {
            'a (AU)': a,
            'e': e,
            'i (deg)': i,
            'w (deg)': w,
            'Node (deg)': node,
            'M (deg)': M,
            'q (AU)': q,
            'Q (AU)': Q,
            'P (yr)': P_yr,
            'H (mag)': H,
            'MOID (AU)': MOID,
            'class': class_type
        }
        
        writer.writerow(row)

print(f"Dane zostały zapisane do pliku {output_csv_file}.")

Testing models on our api data

In [None]:

new_data = pd.read_csv('our_orbits_data.csv')
X_new = new_data.drop(['class', 'Q (AU)'], axis=1)
Y_new = new_data['class']

# Assuming temp[0] is your best model
best_model = temp[0]
predictions_new = best_model.predict(X_new)


Y_new = Y_new.astype(str) 
predictions_new = predictions_new.astype(str)
predictions_new = [label.replace('*', '') for label in predictions_new]

print("Classification Report for New Data:")
print (Y_new)
print(predictions_new)
print(classification_report(Y_new, predictions_new))
print("Accuracy of the best model on new data: ", round(accuracy_score(Y_new, predictions_new) * 100, 2), '%')
