In [8]:
# Import packages
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Sklearn Packages
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Sklearn Evaluation Metrics
from sklearn import metrics
from sklearn.metrics import mean_squared_error, precision_score, confusion_matrix, accuracy_score

# Visualizes all the columns
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

In [9]:
# Import dataset
#path = '/content/Exoplanet_ml'

df = pd.read_csv('cleaned.csv')


In [10]:
features = df.drop(columns=['row','kepid','koi_disposition','koi_score','ExoplanetCandidate'])
target = df.ExoplanetCandidate

In [11]:
df[df.select_dtypes(np.float64).columns] = df.select_dtypes(np.float64).astype(np.float32)

In [12]:
X_train, X_test, y_train, y_test = train_test_split(features, target, random_state=1, test_size=.30)

In [13]:
# Evaluation function

def evaluation(y_true, y_pred):
    
# Print Accuracy, Recall, F1 Score, and Precision metrics.
    print('Evaluation Metrics:')
    print('Accuracy: ' + str(metrics.accuracy_score(y_test, y_pred)))
    print('Recall: ' + str(metrics.recall_score(y_test, y_pred)))
    print('F1 Score: ' + str(metrics.f1_score(y_test, y_pred)))
    print('Precision: ' + str(metrics.precision_score(y_test, y_pred)))
    
# Print Confusion Matrix
    print('\nConfusion Matrix:')
    print(' TN,  FP, FN, TP')
    print(confusion_matrix(y_true, y_pred).ravel())
    
# Function Prints best parameters for GridSearchCV
def print_results(results):
    print('Best Parameters: {}\n'.format(results.best_params_)) 
    

In [14]:
# Logistic Regression Model
lr = LogisticRegression(C=100, max_iter=200, class_weight='balanced')

# Fitting Model to the train set
lr.fit(X_train, y_train)

# Predicting on the test set
y_pred = lr.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.6119402985074627
Recall: 0.7032786885245902
F1 Score: 0.7333333333333334
Precision: 0.7660714285714286

Confusion Matrix:
 TN,  FP, FN, TP
[ 63 131 181 429]


In [15]:
knn = KNeighborsClassifier(leaf_size=8, metric='manhattan',weights='uniform')

# Fitting Model to the train set
knn.fit(X_train, y_train)

# Predicting on the test set
y_pred = knn.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.7014925373134329
Recall: 0.8868852459016393
F1 Score: 0.8184568835098336
Precision: 0.7598314606741573

Confusion Matrix:
 TN,  FP, FN, TP
[ 23 171  69 541]


In [16]:
tree = DecisionTreeClassifier()

# Fitting Model to the train set
tree.fit(X_train, y_train)

# Predicting on the test set
y_pred = tree.predict(X_test)

# Evaluating model
evaluation(y_test, y_pred)

Evaluation Metrics:
Accuracy: 0.6741293532338308
Recall: 0.7950819672131147
F1 Score: 0.7873376623376622
Precision: 0.7797427652733119

Confusion Matrix:
 TN,  FP, FN, TP
[ 57 137 125 485]


In [17]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from tqdm.notebook import tqdm
from time import sleep

In [18]:
parameter_n_estimators = [500]
for i in tqdm(parameter_n_estimators):
    # Instantiate model
    forest = RandomForestClassifier(n_estimators=i, criterion='gini')
    # Fitting Model to the train set
    forest.fit(X_train, y_train)
    # Predicting on the test set
    y_pred = forest.predict(X_test)

    # Evaluating model
    evaluation(y_test, y_pred)
    print('Tree: %s ' % (i))

  0%|          | 0/1 [00:00<?, ?it/s]

Evaluation Metrics:
Accuracy: 0.7064676616915423
Recall: 0.8459016393442623
F1 Score: 0.8138801261829655
Precision: 0.78419452887538

Confusion Matrix:
 TN,  FP, FN, TP
[ 52 142  94 516]
Tree: 500 


In [35]:
import joblib
import lightkurve as lk
import pickle
from pathlib import Path
import os


In [20]:
#joblib.dump(forest, 'trained_models/ml_model3.joblib')

In [42]:
T_name = 'TIC 145241359'
search_result = lk.search_lightcurve(T_name)
lc = search_result[0].download()

In [22]:
flux = lc.flux
arr = []
sum = 0
for x in range(0,1626):
    arr.append(float(flux[x].value))
arr2 = pd.DataFrame(arr)
arr3 =[]
for x in range(0,1625):
    arr3.append(arr2[0][x])
for x in range(0,1625):
    temp = str(arr3[x])
    if temp == 'nan' :
        arr3[x] = 0



In [43]:
flux = lc.flux
arr = []
for x in range(0,1625):
    try:
        arr.append(float(flux[x].value))
    except:
        pass
arr2 = pd.DataFrame(arr)
arr3 =[]
for x in range(0,1625):
    try:
        arr3.append(arr2[0][x])
    except:
        arr3.append(0)
for x in range(0,1625):
    temp = str(arr3[x])
    if temp == 'nan' :
        arr3[x] = 0

[0.9644221067428589,
 0.9621082544326782,
 0.9593591094017029,
 0.9674255847930908,
 0.9770174622535706,
 0.9723681807518005,
 0.9858632683753967,
 0.9967062473297119,
 0.9884286522865295,
 0.9938584566116333,
 0.9978466033935547,
 0.9989888668060303,
 0.9809838533401489,
 0.9936444759368896,
 0.9945304989814758,
 0.9959673881530762,
 0.9932485222816467,
 1.0052539110183716,
 0.9960630536079407,
 0.9921770095825195,
 0.9908486008644104,
 1.0028973817825317,
 1.0042318105697632,
 1.0016424655914307,
 1.002690315246582,
 1.0026103258132935,
 0.9983188509941101,
 1.0002999305725098,
 1.0074574947357178,
 1.002654790878296,
 1.004188895225525,
 1.0029640197753906,
 1.0010368824005127,
 1.0003081560134888,
 1.0014827251434326,
 0.9991661906242371,
 0.9962772130966187,
 0.9969939589500427,
 0.9987486004829407,
 1.0066603422164917,
 0.9946405291557312,
 1.0019738674163818,
 0.9983307719230652,
 0.9889373779296875,
 0.986815333366394,
 0.9876456260681152,
 0.9858681559562683,
 0.98932278156280

In [44]:
model = joblib.load('trained_models/ml_model3.joblib')
result = model.predict([arr3])



In [45]:
print(result)

[0]


In [26]:
test1 = forest.predict([arr3])



In [27]:
print(test1)

[1]
