# Random Forest Model

In [1]:
import pandas as pd

## The Dataset

In [2]:
df = pd.read_csv("exoplanet_data.csv")
# Drop the null columns where all values are null
df = df.dropna(axis='columns', how='all')
# Drop the null rows
df = df.dropna()


## Data Pre-Processing

In [3]:
target = df['koi_disposition']
target_names = ['Confirmed', 'False Positive', 'Candidate']
df = df.drop("koi_disposition", axis=1)

In [4]:
# Set features. This will also be used as your x values.
selected_features = df[['koi_fpflag_nt', 'koi_fpflag_ss', 'koi_fpflag_co',
       'koi_fpflag_ec', 'koi_period','koi_time0bk', 'koi_impact',
        'koi_duration', 'koi_depth', 'koi_prad', 'koi_teq', 'koi_insol', 
       'koi_model_snr', 'koi_steff', 'koi_slogg', 'koi_srad', 'ra', 'dec',
       'koi_kepmag']]

In [5]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(selected_features, target, random_state=1)

In [6]:
from sklearn.preprocessing import StandardScaler

X_scaler = StandardScaler().fit(X_train)

In [7]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [8]:
from sklearn.preprocessing import LabelEncoder

# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y = label_encoder.transform(y_train)

## Create a Random Forest Model

In [9]:
from sklearn.ensemble import RandomForestClassifier

# Create a random forest classifier
rf = RandomForestClassifier(n_estimators=100)
rf = rf.fit(X_train_scaled, encoded_y)
rf.score(X_train_scaled, encoded_y)

1.0

In [10]:
rf.feature_importances_

array([0.11982155, 0.11420368, 0.11898116, 0.05396042, 0.04174047,
       0.02980197, 0.03940095, 0.02991104, 0.03639365, 0.08913246,
       0.03677679, 0.03214553, 0.12816531, 0.02261841, 0.02070232,
       0.02049329, 0.02325331, 0.02147129, 0.02102641])

## Predictions

In [11]:
# Use the first 5 test data values to make a prediction and compare it to the actual labels
predictions = rf.predict(X_test_scaled[:5])


print(f"Actual Labels: {list(y_test[:5])}, Predicted labels: {predictions}")



Actual Labels: ['CONFIRMED', 'FALSE POSITIVE', 'FALSE POSITIVE', 'CONFIRMED', 'FALSE POSITIVE'], Predicted labels: [1 2 2 1 2]


In [12]:
# save your model by updating "your_name" with your name
# and "your_model" with your model variable
# be sure to turn this in to BCS
# if joblib fails to import, try running the command to install in terminal/git-bash
import joblib
filename = 'exoplanet_random_forest.sav'
joblib.dump(rf, filename)

['exoplanet_random_forest.sav']