In [47]:
# Import packages
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn Packages
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Sklearn Evaluation Metrics
from sklearn import metrics
from sklearn.metrics import mean_squared_error, precision_score, confusion_matrix, accuracy_score

# Visualizes all the columns
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [48]:
# Import dataset
#path = '/content/Exoplanet_ml'

df = pd.read_csv('cleaned.csv')


In [49]:
features = df.drop(columns=['row','kepid','koi_disposition','koi_score','ExoplanetCandidate'])
target = df.ExoplanetCandidate

In [50]:
df[df.select_dtypes(np.float64).columns] = df.select_dtypes(np.float64).astype(np.float32)

In [51]:
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=1, test_size=.25)

In [52]:
# Evaluation function

def evaluation(y_true, y_pred):
    
# Print Accuracy, Recall, F1 Score, and Precision metrics.
    print('Evaluation Metrics:')
    print('Accuracy: ' + str(metrics.accuracy_score(y_test, y_pred)))
    print('Recall: ' + str(metrics.recall_score(y_test, y_pred)))
    print('F1 Score: ' + str(metrics.f1_score(y_test, y_pred)))
    print('Precision: ' + str(metrics.precision_score(y_test, y_pred)))
    
# Print Confusion Matrix
    print('\nConfusion Matrix:')
    print(' TN,  FP, FN, TP')
    print(confusion_matrix(y_true, y_pred).ravel())
    
# Function Prints best parameters for GridSearchCV
def print_results(results):
    print('Best Parameters: {}\n'.format(results.best_params_)) 
    

In [53]:
# Logistic Regression Model
lr = LogisticRegression(C=100, max_iter=200, class_weight='balanced')

# Fitting Model to the train set
lr.fit(X_train, y_train)

# Predicting on the test set
y_pred = lr.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.6253731343283582
Recall: 0.7204724409448819
F1 Score: 0.7446592065106816
Precision: 0.7705263157894737

Confusion Matrix:
 TN,  FP, FN, TP
[ 53 109 142 366]


In [54]:
knn = KNeighborsClassifier(leaf_size=8, metric='manhattan',weights='uniform')

# Fitting Model to the train set
knn.fit(X_train, y_train)

# Predicting on the test set
y_pred = knn.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.7044776119402985
Recall: 0.8858267716535433
F1 Score: 0.819672131147541
Precision: 0.7627118644067796

Confusion Matrix:
 TN,  FP, FN, TP
[ 22 140  58 450]


In [55]:
tree = DecisionTreeClassifier()

# Fitting Model to the train set
tree.fit(X_train, y_train)

# Predicting on the test set
y_pred = tree.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.7014925373134329
Recall: 0.8208661417322834
F1 Score: 0.8065764023210832
Precision: 0.7927756653992395

Confusion Matrix:
 TN,  FP, FN, TP
[ 53 109  91 417]


In [56]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tqdm.notebook import tqdm
from time import sleep

In [57]:
parameter_n_estimators = [500]
for i in tqdm(parameter_n_estimators):
    # Instantiate model
    forest = RandomForestClassifier(n_estimators=i, criterion='gini')
    # Fitting Model to the train set
    forest.fit(X_train, y_train)
    # Predicting on the test set
    y_pred = forest.predict(X_test)

    # Evaluating model
    evaluation(y_test, y_pred)
    print('Tree: %s ' % (i))

  0%|          | 0/1 [00:00<?, ?it/s]

Evaluation Metrics:
Accuracy: 0.7134328358208956
Recall: 0.8582677165354331
F1 Score: 0.8195488721804511
Precision: 0.7841726618705036

Confusion Matrix:
 TN,  FP, FN, TP
[ 42 120  72 436]
Tree: 500 


In [58]:
import joblib
import lightkurve as lk
import pickle
from pathlib import Path
import os


In [20]:
#joblib.dump(forest, 'trained_models/ml_model3.joblib')

In [59]:
T_name = 'TIC 145241359'
search_result = lk.search_lightcurve(T_name)
lc = search_result[0].download()

In [61]:
flux = lc.flux
arr = []
for x in range(0,1625):
    try:
        arr.append(float(flux[x].value))
    except:
        pass
arr2 = pd.DataFrame(arr)
arr3 =[]
for x in range(0,1625):
    try:
        arr3.append(arr2[0][x])
    except:
        arr3.append(0)
for x in range(0,1625):
    temp = str(arr3[x])
    if temp == 'nan' :
        arr3[x] = 0



In [44]:
model = joblib.load('trained_models/ml_model3.joblib')
result = model.predict([arr3])



In [45]:
print(result)

[0]


In [62]:
test1 = forest.predict([arr3])



In [63]:
print(test1)

[1]
