# Classificação de asteroide 

**Objetivo do modelo**

Encontrar asteroides potencialmente perigosos e não perigosos.


Recursos responsáveis por declarar que um asteróide é perigoso.

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv("nasa.csv")
df.head()

In [None]:
df.shape

In [None]:
df.tail()

In [None]:
df.isna().sum()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
corr = df.corr()
corr

# Data analytics 

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.countplot(x = "Hazardous", data = df)

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.boxplot(x="Absolute Magnitude", data = df)

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.distplot(df["Inclination"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.distplot(df["Eccentricity"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.distplot(df["Orbital Period"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.distplot(df["Absolute Magnitude"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.distplot(df["Perihelion Distance"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.distplot(df["Aphelion Dist"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
plt.figure(figsize=(20, 10))
ax = sns.distplot(df["Perihelion Time"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
plt.figure(figsize=(20, 10))

ax = sns.distplot(df["Mean Anomaly"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
plt.figure(figsize=(20, 10))
sns.distplot(df["Jupiter Tisserand Invariant"], bins=20 , hist_kws=dict(edgecolor = '#9dab86' ,linewidth=2) , color='green')

In [None]:
fig1 , axes = plt.subplots(nrows=3,ncols=3 , figsize = (20,20))

sns.distplot(df["Absolute Magnitude"] , ax=axes[0, 0])
sns.distplot(df["Inclination"] ,  ax=axes[0, 1])
sns.distplot(df["Orbital Period"] , ax=axes[0, 2])
sns.distplot(df["Absolute Magnitude"], ax=axes[1, 0] )
sns.distplot(df["Perihelion Distance"] , ax=axes[1, 1] )
sns.distplot(df["Aphelion Dist"] , ax=axes[1, 2] )
sns.distplot(df["Perihelion Time"] , ax=axes[2, 0])
sns.distplot(df["Mean Anomaly"], ax=axes[2, 1])
sns.distplot(df["Jupiter Tisserand Invariant"], ax=axes[2, 2])
plt.show()

In [None]:
plt.figure(figsize=(20, 10))

sns.heatmap(corr, annot = True)

# Data preprocessing

In [None]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

label = LabelEncoder()
df["Label"] = label.fit_transform(df["Hazardous"])

cat = list(label.inverse_transform([0, 1]))

df.head()

In [None]:
df["Label"].value_counts()

In [None]:
cla = list(set(df["Hazardous"]))
df.drop(["Miss Dist.(Astronomical)","Miss Dist.(lunar)","Miss Dist.(miles)","Relative Velocity km per sec","Est Dia in M(max)","Relative Velocity km per hr","Est Dia in Feet(max)", "Est Dia in Feet(min)", "Est Dia in Miles(max)", "Est Dia in Miles(min)","Est Dia in KM(max)","Est Dia in KM(min)","Neo Reference ID","Orbit ID","Name","Close Approach Date","Equinox","Epoch Date Close Approach","Orbiting Body","Orbit Determination Date","Hazardous"], axis=1, inplace=True)

In [None]:
plt.figure(figsize=(15, 8))

sns.heatmap(corr, annot = True)

# Treino e Teste

In [None]:
X = df.iloc[: , :-1]
y = df.iloc[: , -1]

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

In [None]:
X_train.shape

In [None]:
y_train.shape

In [None]:
X_test.shape

In [None]:
y_test.shape

In [None]:
scaler = StandardScaler()
scaler_fit = scaler.fit(X_train)
scaler_trasf_train = scaler.transform(X_train)
scaler_trasf_test = scaler.transform(X_test)

In [None]:
scaler_trasf_train.shape

In [None]:
scaler_trasf_test.shape

In [None]:
scaler_trasf_train

In [None]:
scaler_trasf_test

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn import tree

de_tree = DecisionTreeClassifier()
de_tree = de_tree.fit(X_train, y_train)
de_score = de_tree.score(X_train, y_train)
de_score

In [None]:
tree_pred = de_tree.predict(X_test)
tree_pred

In [None]:
# Gráfico da árvore
import matplotlib.pyplot as plt
cn=['0','1']

plt.figure(figsize=(15, 13))
fig, axes = plt.subplots(nrows = 1,ncols = 1,figsize = (4,4), dpi=300)
tree.plot_tree(de_tree, class_names = cn,filled = True);
plt.savefig("Decision Tree")
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix

matrix_confusion = confusion_matrix(y_test, tree_pred)
matrix_confusion

In [None]:
plt.figure(figsize=(10, 8))

ax = sns.heatmap(matrix_confusion, annot = True, fmt='g')

In [None]:
from sklearn.metrics import precision_score

print("Precision = {}".format(precision_score(y_test, tree_pred, average='macro')))

In [None]:
from sklearn.metrics import recall_score

print("Recall = {}".format(recall_score(y_test, tree_pred, average='macro')))

In [None]:
from sklearn.metrics import accuracy_score

print("Accuracy = {}".format(accuracy_score(y_test, tree_pred)))

In [None]:
from sklearn.metrics import f1_score

print("F1 Score = {}".format(f1_score(y_test, tree_pred, average='macro')))

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, tree_pred))

In [None]:
import pickle
 
with open('tree_pred.pkl', 'wb') as file:
    pickle.dump(tree_pred, file)