In [4]:
import csv
import os

# Path to the folder containing CSV files
folder_path = r"C:\Users\KIIT\Desktop\Csv"


# Get a list of all CSV files in the folder
file_list = [file for file in os.listdir(folder_path) if file.endswith(".csv")]

# Initialize a list to store the combined data
combined_data = []

# Iterate over each CSV file and append its data to the combined list
for file in file_list:
    file_path = os.path.join(folder_path, file)
    with open(file_path, "r") as csvfile:
        reader = csv.reader(csvfile)
        header = next(reader)  # Read the header
        for row in reader:
            combined_data.append(row)

# Path and filename for the combined CSV file
combined_file_path = r"C:\Users\KIIT\Desktop\Combined_all.csv"

# Write the combined data to a new CSV file
with open(combined_file_path, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)  # Write the header
    writer.writerows(combined_data)  # Write the data rows

print("CSV files combined successfully!")


CSV files combined successfully!


In [1]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import pandas as pd

In [2]:
# Path to the dataset
dataset_path = r"C:\Users\KIIT\Desktop\Combined_all.csv"

# Load the dataset into a pandas DataFrame
df = pd.read_csv(dataset_path)

# Separate the features (X) and the target variable (y)
X = df.drop('Facies', axis=1)  # Replace 'target_variable' with the actual column name
y = df['Facies']


In [22]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameters for the gradient boosting classifier
params = {
    'n_estimators': 200,
    'learning_rate': 0.05,
    'max_depth': 3,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'subsample': 1.0
}

# Create a gradient boosting classifier object with the defined parameters
gb_classifier = GradientBoostingClassifier(**params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

# Print the accuracy
print("Accuracy:", accuracy)
print("F1 Score:", f1)


Accuracy: 0.9335142469470827
F1 Score: 0.9013620216659122


### Custom Simulated Annealing algorithm to perform optimization

In [28]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import numpy as np

# Function to calculate the objective value (e.g., accuracy, F1 score)
def calculate_objective_value(y_true, y_pred):
    return accuracy_score(y_true, y_pred)

# Function to generate a random neighbor solution
def generate_neighbor_solution(params, temperature):
    neighbor_params = params.copy()
    for param_name in params:
        value = params[param_name]
        if param_name == 'max_depth':
            range_min = max(1, value - int(temperature))
            range_max = value + int(temperature)
            neighbor_params[param_name] = np.random.randint(range_min, range_max + 1)
        elif param_name == 'min_samples_leaf':
            if isinstance(value, int):
                range_min = max(1, value - int(temperature))
                range_max = value + int(temperature)
                neighbor_params[param_name] = np.random.randint(range_min, range_max + 1)
            else:
                range_min = max(0.01, value - temperature)
                range_max = min(0.5, value + temperature)
                neighbor_params[param_name] = round(np.random.uniform(range_min, range_max), 2)
        elif isinstance(value, int):
            range_min = max(0, value - int(temperature))
            range_max = value + int(temperature)
            neighbor_params[param_name] = np.random.randint(range_min, range_max + 1)
        else:
            range_min = max(0.0, value - temperature)
            range_max = min(1.0, value + temperature)
            neighbor_params[param_name] = np.random.uniform(range_min, range_max)
    return neighbor_params

# Function to perform Simulated Annealing optimization
def perform_simulated_annealing(X_train, X_test, y_train, y_test, initial_params, max_iterations, initial_temperature, cooling_rate):
    current_params = initial_params.copy()
    current_classifier = GradientBoostingClassifier(**current_params)
    current_classifier.fit(X_train, y_train)
    current_pred = current_classifier.predict(X_test)
    current_value = calculate_objective_value(y_test, current_pred)

    best_params = current_params.copy()
    best_value = current_value

    for iteration in range(max_iterations):
        temperature = initial_temperature / (1 + cooling_rate * iteration)

        neighbor_params = generate_neighbor_solution(current_params, temperature)
        neighbor_classifier = GradientBoostingClassifier(**neighbor_params)
        neighbor_classifier.fit(X_train, y_train)
        neighbor_pred = neighbor_classifier.predict(X_test)
        neighbor_value = calculate_objective_value(y_test, neighbor_pred)

        delta_value = neighbor_value - current_value

        if delta_value > 0 or np.exp(delta_value / temperature) > np.random.uniform(0, 1):
            current_params = neighbor_params
            current_classifier = neighbor_classifier
            current_pred = neighbor_pred
            current_value = neighbor_value

        if neighbor_value > best_value:
            best_params = neighbor_params
            best_value = neighbor_value

    return best_params, best_value

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the initial parameters for the gradient boosting classifier
initial_params = {
    'n_estimators': 100,
    'learning_rate': 0.1,
    'max_depth': 3,
    'min_samples_split': 2,
    'min_samples_leaf': 1,
    'subsample': 1.0
}

# Set the parameters for the Simulated Annealing optimization
max_iterations = 100
initial_temperature = 100.0
cooling_rate = 0.01

# Perform Simulated Annealing optimization
best_params, best_value = perform_simulated_annealing(X_train, X_test, y_train, y_test, initial_params, max_iterations, initial_temperature, cooling_rate)

# Print the optimized parameters
print("Optimized Parameters:")
for param_name, param_value in best_params.items():
    print(f"{param_name}: {param_value}")

# Create a gradient boosting classifier object with the best parameters
gb_classifier = GradientBoostingClassifier(**best_params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy and F1 score of the classifier
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

# Print the best parameters and the performance metrics
print("Best Parameters:", best_params)
print("Best Accuracy:", best_value)
print("Accuracy:", accuracy)
print("F1 Score:", f1)



Optimized Parameters:
n_estimators: 546
learning_rate: 0.1962544390489066
max_depth: 788
min_samples_split: 208
min_samples_leaf: 245
subsample: 0.9303415851338337
Best Parameters: {'n_estimators': 546, 'learning_rate': 0.1962544390489066, 'max_depth': 788, 'min_samples_split': 208, 'min_samples_leaf': 245, 'subsample': 0.9303415851338337}
Best Accuracy: 0.9525101763907734
Accuracy: 0.9552238805970149
F1 Score: 0.9282703317546004


### Randomized Search Optimization

In [29]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, f1_score
from scipy.stats import randint, uniform

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter distributions for randomized search
param_distributions = {
    'n_estimators': randint(100, 1000),
    'learning_rate': uniform(0.01, 0.5),
    'max_depth': randint(1, 10),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10),
    'subsample': uniform(0.1, 1.0)
}

# Create a gradient boosting classifier object
gb_classifier = GradientBoostingClassifier()

# Perform randomized search for hyperparameter optimization
random_search = RandomizedSearchCV(gb_classifier, param_distributions, n_iter=100, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

# Get the best estimator and its corresponding parameters
best_classifier = random_search.best_estimator_
best_params = random_search.best_params_

# Predict on the test set using the best classifier
y_pred = best_classifier.predict(X_test)

# Calculate the accuracy and F1 score of the classifier
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

# Print the best parameters and the performance metrics
print("Best Parameters:", best_params)
print("Accuracy:", accuracy)
print("F1 Score:", f1)



65 fits failed out of a total of 500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\KIIT\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\KIIT\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 525, in fit
    self._check_params()
  File "C:\Users\KIIT\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 317, in _check_params
    raise ValueError("subsample must be in (0,1] but was %r" % self.subsample)
ValueError: subsample must be in (0,1] but was 1.0832308858067883

--------------------------------------------------

Best Parameters: {'learning_rate': 0.13938999080000847, 'max_depth': 8, 'min_samples_leaf': 4, 'min_samples_split': 3, 'n_estimators': 489, 'subsample': 0.3079416628681888}
Accuracy: 0.9497964721845319
F1 Score: 0.9213902462885514


### Grid Search Optimization

In [30]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import accuracy_score, f1_score
from scipy.stats import randint, uniform

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameter distributions for random search
param_distributions = {
    'n_estimators': randint(100, 1000),
    'learning_rate': uniform(0.01, 0.5),
    'max_depth': randint(3, 10),
    'min_samples_split': randint(2, 20),
    'min_samples_leaf': randint(1, 10),
    'subsample': uniform(0.6, 1.0)
}

# Create a gradient boosting classifier object
gb_classifier = GradientBoostingClassifier()

# Perform random search for hyperparameter optimization
random_search = RandomizedSearchCV(gb_classifier, param_distributions, n_iter=100, cv=5, scoring='accuracy', random_state=42)
random_search.fit(X_train, y_train)

# Get the best estimator and its corresponding parameters
best_classifier = random_search.best_estimator_
best_params = random_search.best_params_

# Predict on the test set using the best classifier
y_pred = best_classifier.predict(X_test)

# Calculate the accuracy and F1 score of the classifier
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

# Print the best parameters and the performance metrics
print("Best Parameters:", best_params)
print("Accuracy:", accuracy)
print("F1 Score:", f1)


310 fits failed out of a total of 500.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\KIIT\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 680, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\KIIT\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 525, in fit
    self._check_params()
  File "C:\Users\KIIT\anaconda3\lib\site-packages\sklearn\ensemble\_gb.py", line 317, in _check_params
    raise ValueError("subsample must be in (0,1] but was %r" % self.subsample)
ValueError: subsample must be in (0,1] but was 1.2508884729488527

-------------------------------------------------

Best Parameters: {'learning_rate': 0.23421207149312367, 'max_depth': 7, 'min_samples_leaf': 9, 'min_samples_split': 5, 'n_estimators': 260, 'subsample': 0.6180753636155208}
Accuracy: 0.9538670284938942
F1 Score: 0.9254729681878515


### bayes search optimization

In [34]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from skopt import BayesSearchCV

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the search space for Bayesian optimization
param_space = {
    'n_estimators': (100, 1000),
    'learning_rate': (0.01, 0.5, 'log-uniform'),
    'max_depth': (3, 10),
    'min_samples_split': (2, 20),
    'min_samples_leaf': (1, 10),
    'subsample': (0.6, 1.0, 'uniform')
}

# Create a gradient boosting classifier object
gb_classifier = GradientBoostingClassifier()

# Perform Bayesian optimization for hyperparameter tuning
bayes_search = BayesSearchCV(gb_classifier, param_space, n_iter=100, cv=5, scoring='accuracy')
bayes_search.fit(X_train, y_train)

# Get the best estimator and its corresponding parameters
best_classifier = bayes_search.best_estimator_
best_params = bayes_search.best_params_

# Predict on the test set using the best classifier
y_pred = best_classifier.predict(X_test)

# Calculate the accuracy and F1 score of the classifier
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

# Print the best parameters and the performance metrics
print("Best Parameters:", best_params)
print("Accuracy:", accuracy)
print("F1 Score:", f1)


Best Parameters: OrderedDict([('learning_rate', 0.021698784649126786), ('max_depth', 10), ('min_samples_leaf', 10), ('min_samples_split', 11), ('n_estimators', 1000), ('subsample', 0.6412325151785777)])
Accuracy: 0.9484396200814111
F1 Score: 0.9179586829313171


### Best parameters 
##### 1. simulated annealing
##### Best Parameters: {'n_estimators': 97, 'learning_rate': 0.18581554614968288, 'max_depth': 83, 'min_samples_split': 120, 'min_samples_leaf': 74, 'subsample': 0.21483960974576843}
##### Best Accuracy: 0.9565807327001357

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameters for the gradient boosting classifier
params = {
    'n_estimators': 97,
    'learning_rate':  0.18581554614968288,
    'max_depth': 83,
    'min_samples_split': 120,
    'min_samples_leaf': 74,
    'subsample': 0.21483960974576843
}

# Create a gradient boosting classifier object with the defined parameters
gb_classifier = GradientBoostingClassifier(**params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

# Print the accuracy
print("Accuracy:", accuracy)
print("F1 Score:", f1)


Accuracy: 0.9443690637720489
F1 Score: 0.9109112575985641


##### 2. Grid Search
##### Best Parameters: {'learning_rate': 0.23421207149312367, 'max_depth': 7, 'min_samples_leaf': 9, 'min_samples_split': 5, 'n_estimators': 260, 'subsample': 0.6180753636155208}
##### Accuracy: 0.9538670284938942

In [8]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the parameters for the gradient boosting classifier
params = {
    'n_estimators': 260,
    'learning_rate':  0.23421207149312367,
    'max_depth': 7,
    'min_samples_split': 5,
    'min_samples_leaf': 9,
    'subsample': 0.6180753636155208
}

# Create a gradient boosting classifier object with the defined parameters
gb_classifier = GradientBoostingClassifier(**params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

# Print the accuracy
print("Accuracy:", accuracy)
print("F1 Score:", f1)


Accuracy: 0.9525101763907734
F1 Score: 0.9241244916869189


### Combining 4 csvs

In [46]:
import csv
import os

# Path to the folder containing CSV files
folder_path = r"C:\Users\KIIT\Desktop\Csv"


# Get a list of all CSV files in the folder
file_list = [file for file in os.listdir(folder_path) if file.endswith(".csv")]

# Initialize a list to store the combined data
combined_data = []

# Iterate over each CSV file and append its data to the combined list
for file in file_list:
    file_path = os.path.join(folder_path, file)
    with open(file_path, "r") as csvfile:
        reader = csv.reader(csvfile)
        header = next(reader)  # Read the header
        for row in reader:
            combined_data.append(row)

# Path and filename for the combined CSV file
combined_file_path = r"C:\Users\KIIT\Desktop\Combined_4.csv"

# Write the combined data to a new CSV file
with open(combined_file_path, "w", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(header)  # Write the header
    writer.writerows(combined_data)  # Write the data rows

print("CSV files combined successfully!")


CSV files combined successfully!


##### 1. simulated annealing

In [47]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score

# Load the training data from the first CSV file
train_data = pd.read_csv(r"C:\Users\KIIT\Desktop\Combined_4.csv")

# Split the features and target variable
X_train = train_data.drop('Facies', axis=1)
y_train = train_data['Facies']

# Load the test data from the second CSV file
test_data = pd.read_csv(r"C:\Users\KIIT\Desktop\SB-19.csv")

# Split the features and target variable
X_test = test_data.drop('Facies', axis=1)
y_test = test_data['Facies']

# Define the parameters for the gradient boosting classifier
params = {
    'n_estimators': 97,
    'learning_rate':  0.18581554614968288,
    'max_depth': 83,
    'min_samples_split': 120,
    'min_samples_leaf': 74,
    'subsample': 0.21483960974576843
}

# Create a gradient boosting classifier object with the defined parameters
gb_classifier = GradientBoostingClassifier(**params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)


Accuracy: 0.7868480725623582


##### 2. Grid Search

In [48]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
# Load the training data from the first CSV file
train_data = pd.read_csv(r"C:\Users\KIIT\Desktop\Combined_4.csv")

# Split the features and target variable
X_train = train_data.drop('Facies', axis=1)
y_train = train_data['Facies']

# Load the test data from the second CSV file
test_data = pd.read_csv(r"C:\Users\KIIT\Desktop\SB-19.csv")

# Split the features and target variable
X_test = test_data.drop('Facies', axis=1)
y_test = test_data['Facies']

# Define the parameters for the gradient boosting classifier
params = {
    'n_estimators': 260,
    'learning_rate':  0.23421207149312367,
    'max_depth': 7,
    'min_samples_split': 5,
    'min_samples_leaf': 9,
    'subsample': 0.6180753636155208
}

# Create a gradient boosting classifier object with the defined parameters
gb_classifier = GradientBoostingClassifier(**params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.7936507936507936


##### 3. Randomized Search
##### Best Parameters: {'learning_rate': 0.13938999080000847, 'max_depth': 8, 'min_samples_leaf': 4, 'min_samples_split': 3, 'n_estimators': 489, 'subsample': 0.3079416628681888}
##### Accuracy: 0.9497964721845319
##### F1 Score: 0.9213902462885514

In [49]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
# Load the training data from the first CSV file
train_data = pd.read_csv(r"C:\Users\KIIT\Desktop\Combined_4.csv")

# Split the features and target variable
X_train = train_data.drop('Facies', axis=1)
y_train = train_data['Facies']

# Load the test data from the second CSV file
test_data = pd.read_csv(r"C:\Users\KIIT\Desktop\SB-19.csv")

# Split the features and target variable
X_test = test_data.drop('Facies', axis=1)
y_test = test_data['Facies']

# Define the parameters for the gradient boosting classifier
params = {
    'n_estimators': 3,
    'learning_rate': 0.13938999080000847,
    'max_depth': 8,
    'min_samples_split': 3,
    'min_samples_leaf': 4,
    'subsample': 0.3079416628681888
}

# Create a gradient boosting classifier object with the defined parameters
gb_classifier = GradientBoostingClassifier(**params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.7437641723356009


##### Sinulated Annealing part 2

In [50]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
# Load the training data from the first CSV file
train_data = pd.read_csv(r"C:\Users\KIIT\Desktop\Combined_4.csv")

# Split the features and target variable
X_train = train_data.drop('Facies', axis=1)
y_train = train_data['Facies']

# Load the test data from the second CSV file
test_data = pd.read_csv(r"C:\Users\KIIT\Desktop\SB-19.csv")

# Split the features and target variable
X_test = test_data.drop('Facies', axis=1)
y_test = test_data['Facies']

# Define the parameters for the gradient boosting classifier
params = {
    'n_estimators': 546,
    'learning_rate': 0.1962544390489066,
    'max_depth': 788,
    'min_samples_split': 208,
    'min_samples_leaf': 245, 
    'subsample': 0.9303415851338337
}

# Create a gradient boosting classifier object with the defined parameters
gb_classifier = GradientBoostingClassifier(**params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.7573696145124716


##### Bayes Search

In [51]:
import pandas as pd
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score
# Load the training data from the first CSV file
train_data = pd.read_csv(r"C:\Users\KIIT\Desktop\Combined_4.csv")

# Split the features and target variable
X_train = train_data.drop('Facies', axis=1)
y_train = train_data['Facies']

# Load the test data from the second CSV file
test_data = pd.read_csv(r"C:\Users\KIIT\Desktop\SB-19.csv")

# Split the features and target variable
X_test = test_data.drop('Facies', axis=1)
y_test = test_data['Facies']

# Define the parameters for the gradient boosting classifier
params = {
    'n_estimators': 1000,
    'learning_rate': 0.021698784649126786,
    'max_depth': 10,
    'min_samples_split': 11,
    'min_samples_leaf': 10, 
    'subsample': 0.6412325151785777
}

# Create a gradient boosting classifier object with the defined parameters
gb_classifier = GradientBoostingClassifier(**params)

# Train the classifier
gb_classifier.fit(X_train, y_train)

# Predict on the test set
y_pred = gb_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)

# Print the accuracy
print("Accuracy:", accuracy)

Accuracy: 0.7913832199546486


In [3]:
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from skopt import BayesSearchCV
import time
import pandas as pd

# Path to the dataset
dataset_path = r"C:\Users\KIIT\Desktop\Final\Combined_12.csv"

# Load the dataset into a pandas DataFrame
df = pd.read_csv(dataset_path)

# Separate the features (X) and the target variable (y)
X = df.drop('Facies', axis=1)  # Replace 'target_variable' with the actual column name
y = df['Facies']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the search space for Bayesian optimization
param_space = {
    'n_estimators': (100, 1000),
    'learning_rate': (0.01, 0.9999999999, 'log-uniform'),
    'max_depth': (3, 10),
    'min_samples_split': (2, 20),
    'min_samples_leaf': (1, 10),
    'subsample': (0.1, 1.0, 'uniform')
}

# Create a gradient boosting classifier object
gb_classifier = GradientBoostingClassifier()

start = time.time()

# Perform Bayesian optimization for hyperparameter tuning
bayes_search = BayesSearchCV(gb_classifier, param_space, n_iter=100, cv=5, scoring='accuracy')
bayes_search.fit(X_train, y_train)

# Get the best estimator and its corresponding parameters
best_classifier = bayes_search.best_estimator_
best_params = bayes_search.best_params_

end= time.time()
print(f"time:{end-start}")

# Predict on the test set using the best classifier
y_pred = best_classifier.predict(X_test)

# Calculate the accuracy and F1 score of the classifier
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro')

# Print the best parameters and the performance metrics
print("Best Parameters:", best_params)
print("Accuracy:", accuracy)
print("F1 Score:", f1)

KeyboardInterrupt: 