In [10]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import LabelEncoder
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, classification_report

In [11]:

def Question_1():
    df = pd.read_csv('imputed_data.csv')

    # features and target variable
    X = df.drop(columns=['koi_disposition'])  
    y = df['koi_disposition']  

    # label encoding on the target variable
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    #parameter grid for RandomizedSearchCV
    param_grid = {
        'hidden_layer_sizes': [(50,), (100,), (100, 50), (100, 100)],
        'activation': ['relu', 'tanh'],
        'solver': ['adam', 'sgd'],
        'alpha': [0.0001, 0.001, 0.01],
        'learning_rate': ['constant', 'adaptive']
    }

    mlp_classifier = MLPClassifier(max_iter=500)

    random_search = RandomizedSearchCV(mlp_classifier, param_distributions=param_grid, n_iter=10, cv=3, random_state=42)

    random_search.fit(X_train, y_train)

    best_estimator = random_search.best_estimator_
    best_params = random_search.best_params_

    train_accuracy = best_estimator.score(X_train, y_train)
    test_accuracy = best_estimator.score(X_test, y_test)
    
    return  best_params, train_accuracy, test_accuracy




In [12]:
def Question_2():

    # Load data
    df = pd.read_csv('imputed_data.csv')

    # Split data into features (X) and target variable (y)
    X = df.drop(columns=['koi_disposition'])
    y = df['koi_disposition']

    # Label encoding on the target variable
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Decision Tree Classifier
    dt_classifier = DecisionTreeClassifier()
    dt_classifier.fit(X_train, y_train)
    dt_predictions = dt_classifier.predict(X_test)

    # Naive Bayes Classifier
    nb_classifier = GaussianNB()
    nb_classifier.fit(X_train, y_train)
    nb_predictions = nb_classifier.predict(X_test)

    # Evaluation of thee classifiers
    dt_accuracy = accuracy_score(y_test, dt_predictions)
    nb_accuracy = accuracy_score(y_test, nb_predictions)
    
    print("\nClassification Report for Decision Tree Classifier:")
    print(classification_report(y_test, dt_predictions))

    print("\nClassification Report for Naive Bayes Classifier:")
    print(classification_report(y_test, nb_predictions))

    return dt_accuracy, nb_accuracy
    



In [13]:
def main():
    # Question - 1
    best_params, train_accuracy, test_accuracy = Question_1()
    print("Best Hyperparameters:", best_params)
    print("Train Accuracy:", train_accuracy)
    print("Test Accuracy:", test_accuracy)
    
    
    #Question - 2
    dt_accuracy, nb_accuracy = Question_2()
    print("Decision Tree Classifier Accuracy:", dt_accuracy)
    print("Naive Bayes Classifier Accuracy:", nb_accuracy)
    
main()


Best Hyperparameters: {'solver': 'adam', 'learning_rate': 'constant', 'hidden_layer_sizes': (50,), 'alpha': 0.0001, 'activation': 'relu'}
Train Accuracy: 0.6115540452228467
Test Accuracy: 0.5912179822268688
Decision Tree Classifier Accuracy: 0.9100888656560376
Naive Bayes Classifier Accuracy: 0.5154208050182959

Classification Report for Decision Tree Classifier:
              precision    recall  f1-score   support

           0       0.80      0.79      0.79       408
           1       0.85      0.85      0.85       566
           2       1.00      1.00      1.00       939

    accuracy                           0.91      1913
   macro avg       0.88      0.88      0.88      1913
weighted avg       0.91      0.91      0.91      1913


Classification Report for Naive Bayes Classifier:
              precision    recall  f1-score   support

           0       0.39      0.25      0.31       408
           1       0.43      0.96      0.59       566
           2       0.91      0.36      