In [None]:
# Import packages
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn Packages
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Sklearn Evaluation Metrics
from sklearn import metrics
from sklearn.metrics import mean_squared_error, precision_score, confusion_matrix, accuracy_score

# Visualizes all the columns
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [None]:
# Import dataset
#path = '/content/Exoplanet_ml'

df = pd.read_csv('cleaned.csv')


In [None]:
features = df.drop(columns=['row','kepid','koi_disposition','koi_score','ExoplanetCandidate'])
target = df.ExoplanetCandidate

In [None]:
df[df.select_dtypes(np.float64).columns] = df.select_dtypes(np.float64).astype(np.float32)

In [179]:
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=1, test_size=.30)

In [180]:
# Evaluation function

def evaluation(y_true, y_pred):
    
# Print Accuracy, Recall, F1 Score, and Precision metrics.
    print('Evaluation Metrics:')
    print('Accuracy: ' + str(metrics.accuracy_score(y_test, y_pred)))
    print('Recall: ' + str(metrics.recall_score(y_test, y_pred)))
    print('F1 Score: ' + str(metrics.f1_score(y_test, y_pred)))
    print('Precision: ' + str(metrics.precision_score(y_test, y_pred)))
    
# Print Confusion Matrix
    print('\nConfusion Matrix:')
    print(' TN,  FP, FN, TP')
    print(confusion_matrix(y_true, y_pred).ravel())
    
# Function Prints best parameters for GridSearchCV
def print_results(results):
    print('Best Parameters: {}\n'.format(results.best_params_)) 
    

In [181]:
# Logistic Regression Model
lr = LogisticRegression(C=100, max_iter=200, class_weight='balanced')

# Fitting Model to the train set
lr.fit(X_train, y_train)

# Predicting on the test set
y_pred = lr.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.5849858356940509
Recall: 0.6760299625468165
F1 Score: 0.7113300492610837
Precision: 0.7505197505197505

Confusion Matrix:
 TN,  FP, FN, TP
[ 52 120 173 361]


In [182]:
knn = KNeighborsClassifier(leaf_size=8, metric='manhattan',weights='uniform')

# Fitting Model to the train set
knn.fit(X_train, y_train)

# Predicting on the test set
y_pred = knn.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.7082152974504249
Recall: 0.9063670411985019
F1 Score: 0.8245315161839863
Precision: 0.75625

Confusion Matrix:
 TN,  FP, FN, TP
[ 16 156  50 484]


In [183]:
tree = DecisionTreeClassifier()

# Fitting Model to the train set
tree.fit(X_train, y_train)

# Predicting on the test set
y_pred = tree.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.6473087818696884
Recall: 0.7696629213483146
F1 Score: 0.7675070028011205
Precision: 0.7653631284916201

Confusion Matrix:
 TN,  FP, FN, TP
[ 46 126 123 411]


In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tqdm.notebook import tqdm
from time import sleep

In [184]:
parameter_n_estimators = [500]
for i in tqdm(parameter_n_estimators):
    # Instantiate model
    forest = RandomForestClassifier(n_estimators=i, criterion='gini')
    # Fitting Model to the train set
    forest.fit(X_train, y_train)
    # Predicting on the test set
    y_pred = forest.predict(X_test)

    # Evaluating model
    evaluation(y_test, y_pred)
    print('Tree: %s ' % (i))

  0%|          | 0/1 [00:00<?, ?it/s]

Evaluation Metrics:
Accuracy: 0.6827195467422096
Recall: 0.8445692883895131
F1 Score: 0.8010657193605685
Precision: 0.7618243243243243

Confusion Matrix:
 TN,  FP, FN, TP
[ 31 141  83 451]
Tree: 500 


In [18]:
import joblib
import lightkurve as lk

In [None]:
joblib.dump(forest, 'trained_models/ml_model3.joblib')

In [168]:
T_name = '10848459'
search_result = lk.search_lightcurve(T_name)
lc = search_result[0].download()

In [185]:
flux = lc.flux
arr = []
sum = 0
for x in range(0,1626):
    arr.append(float(flux[x].value))
arr2 = pd.DataFrame(arr)
arr3 =[]
for x in range(0,1625):
    arr3.append(arr2[0][x])
for x in range(0,1625):
    temp = str(arr3[x])
    if temp == 'nan' :
        arr3[x] = 0



2


In [188]:
arr4 = []
for x in range(0,1625):
    arr4.append(0)

In [189]:
test1 = forest.predict([arr4])



In [190]:
print(test1)

[0]


In [174]:
print(arr[-1])

8198.67578125
