In [None]:
# Numpy and Pandas
import numpy as np
import pandas as pd

# Pre-processing and setup functions
from sklearn.preprocessing import MinMaxScaler, StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.utils import to_categorical

# Algorithms
from sklearn.ensemble import RandomForestClassifier

# Report and model validation
from sklearn.metrics import classification_report

# Model persistence
from joblib import dump, load

pd.set_option('display.max_columns', 500)

In [None]:
exo2 = pd.read_csv('transformed_features.csv')

In [None]:
exo2.sample(10)

In [None]:
exo2.info()

In [None]:
y = exo2['koi_disposition']
exo2_features = exo2.drop(['koi_disposition', 'Unnamed: 0'], axis=1)

In [None]:
exo2_features.sample(5)

In [None]:
scaler = MinMaxScaler()
scaled_features = scaler.fit_transform(exo2_features)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(scaled_features, y, random_state=42, stratify=y)

In [None]:
forest = RandomForestClassifier()
forest_grid = {
    'n_estimators': [25, 50, 100, 200, 400, 800],
    'criterion': ['gini', 'entropy'],
    'bootstrap': [True, False]
}

In [None]:
grid = GridSearchCV(
    forest, 
    forest_grid, 
    scoring='accuracy', 
    cv=10, 
    n_jobs=-1, 
    verbose=3
)

grid.fit(X_train, y_train)

In [None]:
print(grid.best_params_)

In [None]:
print(grid.best_score_)

In [None]:
predictions = grid.predict(X_test)
print(classification_report(y_test, predictions))