In [11]:
#Importing librabries
from sklearn.ensemble import GradientBoostingClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score, RandomizedSearchCV
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import preprocessing
from scipy.stats import randint, uniform

import warnings
warnings.filterwarnings("ignore")

In [12]:
#Initializing an empty dataframe for storing the data after each iteration
results_df = pd.DataFrame(columns=['n_estimators', 'learning_rate', 'max_depth', 'max_features', 'mean_test_score'])

In [13]:
#Loading the dataset
file_path = "./dataset.csv"
try:
    dataset = pd.read_csv(file_path)
except FileNotFoundError:
    print("File not found. Please check the path and try again.")
    exit(1)

In [14]:
#Verify if dataset is a dataframe
if not isinstance(dataset, pd.DataFrame):
    print("Dataset is not a dataframe. Please check the file and try again.")
    exit(1)
    

In [15]:
encoder = LabelEncoder()
y = encoder.fit_transform(dataset['koi_disposition'])

dataset_numeric = dataset.dropna(subset=['koi_score'])

#Save the 'koi_disposition' column in a variable and drop it from the dataset
koi_disposition_column = dataset['koi_disposition']

non_numeric_columns = dataset.select_dtypes(exclude=['number']).columns

#Drop 'koi_disposition' and any other non-numeric columns from the dataset
dataset_numeric = dataset.drop(columns=non_numeric_columns)

dataset_numeric['koi_disposition'] = koi_disposition_column


In [16]:
#Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(dataset_numeric.drop(columns=['koi_disposition']), y, test_size=0.2, random_state=42)

In [18]:
#Handle missing values using SimpleImputed for the training and testing sets
imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

In [19]:
#Scaling the training and testing sets
scaler = StandardScaler().fit(X_train_imputed)
X_train_scaled = scaler.transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

In [20]:
#Setting initial ranges for hyperparameters
param_ranges = {
    'n_estimators': (90, 130),
    'learning_rate': (0.01, 0.1),
    'max_depth': (5, 15),
    'max_features': ['sqrt', 'log2', None]
}

#Setting the number of iterations
n_iter = 200