In [None]:
#importing required libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
#loading the data
data=pd.read_csv('../input/kepler-exoplanet-search-results/cumulative.csv')
data

In [None]:
#data information
print(data.info())

In [None]:
#checking out null values in the dataset
print(data.isna().sum())

In [None]:
#percentage null values
print(data.isna().mean(),"\n\n")

#checking if any percentage of null values is greater than 25 percent
print(data.isna().mean()>=0.1)

In [None]:
#preprocessing function

def preprocess(data):
    data=data.copy()
    
    #dropping unused columns
    data=data.drop(['rowid', 'kepid', 'kepoi_name', 'kepler_name', 'koi_pdisposition','koi_score'], axis=1)
    
    #Limit target values to CANDIDATE and CONFIRMED (temporary binary classification)
    false_pos_rows=data.query("koi_disposition=='FALSE POSITIVE'").index
    data=data.drop(false_pos_rows, axis=0)
    
    #dropping columns which have all missing values
    data=data.drop(['koi_teq_err1', 'koi_teq_err2'], axis=1)
    
    #filling missing values
    data['koi_tce_delivname']=data['koi_tce_delivname'].fillna(data['koi_tce_delivname'].mode()[0])
    for col in data.columns[data.isna().sum()>0]:
        data[col]=data[col].fillna(data[col].mean())
    
    #creating separate columns for koi_tce_delivname
    dummies=pd.get_dummies(data['koi_tce_delivname'], prefix='delivname')
    data=pd.concat([data, dummies], axis=1)
    data=data.drop(['koi_tce_delivname'], axis=1)
    
    #splitting data into features and target
    y=data['koi_disposition']
    X=data.drop(['koi_disposition'], axis=1)
    
    #train test split
    X_train, X_test, y_train, y_test=train_test_split(X, y, train_size=0.8, random_state=1, shuffle=True)
    
    #scaling the data
    scaler=StandardScaler()
    scaler.fit(X_train)
    X_train=pd.DataFrame(scaler.transform(X_train), index=X_train.index, columns=X_train.columns)
    X_test=pd.DataFrame(scaler.transform(X_test), index=X_test.index, columns=X_test.columns)
    
    return X_train, X_test, y_train, y_test
    

In [None]:
#running the preprocess function
X_train, X_test, y_train, y_test=preprocess(data)

#Training data
print(X_train, "\n", y_train)

In [None]:
#creating the neural network model(MLP classifier)
model=MLPClassifier(hidden_layer_sizes=(150))
print("Model created")

In [None]:
#fitting the model
model.fit(X_train, y_train)
print("Model training complete")

In [None]:
#predictions
y_pred=model.predict(X_test)

#actual target test set
print("Test set:\n",y_test.ravel())

#predicted set
print("\nPredicted set:\n", y_pred.ravel())

In [None]:
#confusion matrix
print(confusion_matrix(y_test, y_pred))

In [None]:
#classification report
print(classification_report(y_test, y_pred))