In [4]:
# Import necessary libraries
import math
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV


In [5]:

# Loading the dataset from drive using pandas
df = pd.read_csv("/content/sample_data/Dataset_SMOTE_MC.csv")
df.head()


Unnamed: 0,baseline value,accelerations,fetal_movement,uterine_contractions,light_decelerations,severe_decelerations,prolongued_decelerations,abnormal_short_term_variability,mean_value_of_short_term_variability,percentage_of_time_with_abnormal_long_term_variability,...,histogram_min,histogram_max,histogram_number_of_peaks,histogram_number_of_zeroes,histogram_mode,histogram_mean,histogram_median,histogram_variance,histogram_tendency,target
0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,73.0,0.5,43.0,...,62.0,126.0,2.0,0.0,120.0,137.0,121.0,73.0,1.0,2.0
1,132.0,0.005,0.0,0.006,0.003,0.0,0.0,17.0,2.1,0.0,...,68.0,198.0,6.0,0.569969,141.0,136.0,140.0,12.0,0.0,1.0
2,133.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.1,0.0,...,68.0,198.0,5.0,0.569969,141.0,135.0,138.0,13.0,0.0,1.0
3,134.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,11.0,0.0,137.0,134.0,137.0,13.0,1.0,1.0
4,132.0,0.005,0.0,0.008,0.0,0.0,0.0,16.0,2.4,0.0,...,53.0,170.0,9.0,0.0,137.0,136.0,138.0,11.0,1.0,1.0


In [6]:

X = df.iloc[:, 0:21]  # Independent columns
y = df.iloc[:, -1]


In [7]:

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Initialize StandardScaler
scaler = StandardScaler()

# Fit the scaler to the train set, it will learn the parameters
scaler.fit(X_train)


In [8]:

# Transform train and test sets
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Initialize AdaBoostClassifier
adaboost = AdaBoostClassifier()


# Fit the AdaBoost model
adaboost.fit(X_train_scaled, y_train)

# Predict on training set
y_pred_train = adaboost.predict(X_train_scaled)
accuracy = accuracy_score(y_train, y_pred_train)
print(f"Training Accuracy:{accuracy*100}")

# Predict on test set
y_pred_test = adaboost.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred_test)
print(f"Testing Accuracy:{accuracy*100}")


Training Accuracy:89.7532729103726
Testing Accuracy:90.23162134944613


In [9]:

# Define the search space for hyperparameter tuning
param_grid = {
    "n_estimators": [50, 100, 150],  # Number of weak learners
    "learning_rate": [0.01, 0.1, 1.0],  # Weight applied to each classifier
}

# Initialize RandomizedSearchCV for parameter tuning
random_search = RandomizedSearchCV(adaboost, param_distributions=param_grid, n_iter=5, scoring='accuracy', n_jobs=-1, cv=5, verbose=3)
random_search.fit(X, y)

# Get the best parameters
print(random_search.best_params_)


Fitting 5 folds for each of 5 candidates, totalling 25 fits
{'n_estimators': 100, 'learning_rate': 0.1}


In [10]:

# Use the best parameters to create a new AdaBoostClassifier
best_adaboost = AdaBoostClassifier(**random_search.best_params_)

# Fit the new AdaBoost model with best parameters
best_adaboost.fit(X_train_scaled, y_train)

# Predict on test set using the tuned model
y_pred_test_tuned = best_adaboost.predict(X_test_scaled)
print(accuracy_score(y_test, y_pred_test_tuned) * 100)

91.33937562940584


In [11]:
from sklearn.model_selection import cross_val_score
score=cross_val_score(best_adaboost,X,y,cv=10)

In [12]:
score

array([0.87726358, 0.85714286, 0.90945674, 0.89537223, 0.95573441,
       0.92943548, 0.91935484, 0.90524194, 0.88709677, 0.69354839])

In [13]:
score.mean()

0.882964723826832

In [14]:
import lightgbm as lgb
from sklearn.metrics import accuracy_score

lgb_model = lgb.LGBMClassifier(
    # Specify your hyperparameters here
)

lgb_model.fit(X_train, y_train)

y_pred_lgb = lgb_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred_lgb)
print("Accuracy: ", accuracy)


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000709 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 4655
[LightGBM] [Info] Number of data points in the train set: 3972, number of used features: 19
[LightGBM] [Info] Start training from score -1.100124
[LightGBM] [Info] Start training from score -1.087347
[LightGBM] [Info] Start training from score -1.108480
Accuracy:  0.9869083585095669


In [15]:

df_BC=pd.read_csv("/content/sample_data/Dataset_SMOTE_BC.csv")
df_BC.head()

Unnamed: 0,baseline value,accelerations,fetal_movement,uterine_contractions,light_decelerations,severe_decelerations,prolongued_decelerations,abnormal_short_term_variability,mean_value_of_short_term_variability,percentage_of_time_with_abnormal_long_term_variability,...,histogram_max,histogram_number_of_peaks,histogram_number_of_zeroes,histogram_mode,histogram_mean,histogram_median,histogram_variance,histogram_tendency,target,binary_target
0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,73.0,0.5,43.0,...,126.0,2.0,0.0,120.0,137.0,121.0,73.0,1.0,2.0,0
1,132.0,0.005,0.0,0.006,0.003,0.0,0.0,17.0,2.1,0.0,...,198.0,6.0,0.569969,141.0,136.0,140.0,12.0,0.0,1.0,1
2,133.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.1,0.0,...,198.0,5.0,0.569969,141.0,135.0,138.0,13.0,0.0,1.0,1
3,134.0,0.003,0.0,0.008,0.003,0.0,0.0,16.0,2.4,0.0,...,170.0,11.0,0.0,137.0,134.0,137.0,13.0,1.0,1.0,1
4,132.0,0.005,0.0,0.008,0.0,0.0,0.0,16.0,2.4,0.0,...,170.0,9.0,0.0,137.0,136.0,138.0,11.0,1.0,1.0,1


In [16]:
# Extract features (X) and target (y)
X_bc = df_BC.iloc[:, :21]  # Independent columns
y_bc = df_BC['binary_target']

# Split the dataset into training and testing sets
X_train_bc, X_test_bc, y_train_bc, y_test_bc = train_test_split(X_bc, y_bc, test_size=0.3, random_state=0)


adaboost_bc = AdaBoostClassifier(
    # You can specify the base estimator here if needed, e.g., base_estimator=DecisionTreeClassifier(),
    n_estimators=50,  # Number of weak learners (default is 50)
    learning_rate=1.0,  # Weight applied to each weak learner (default is 1.0)
    random_state=42  # Seed for reproducibility (you can change this to any desired value)
)

# Train the model
adaboost_bc.fit(X_train, y_train)


In [17]:
# Evaluate on training set
y_pred_train_bc = adaboost_bc.predict(X_train_bc)
print("Training Accuracy:", accuracy_score(y_train_bc, y_pred_train_bc))

# Evaluate on test set
y_pred_test_bc = adaboost_bc.predict(X_test_bc)
print("Test Accuracy:", accuracy_score(y_test_bc, y_pred_test_bc))

# Define the search space
param_grid = {
    # Number of weak learners
    "n_estimators": [50, 100, 150],
    # Weight applied to each weak learner
    "learning_rate": [0.0001, 0.001, 0.01, 0.1, 1],
}

# Randomized search for hyperparameter tuning
random_search_bc = RandomizedSearchCV(adaboost_bc, param_distributions=param_grid, n_iter=5, scoring='accuracy', n_jobs=-1, cv=5, verbose=3)
random_search_bc.fit(X_bc, y_bc)

# Print the best hyperparameters found
print("Best Hyperparameters:", random_search_bc.best_params_)

# Use the best hyperparameters to create a new AdaBoost classifier
best_adaboost_bc = AdaBoostClassifier(
    n_estimators=random_search_bc.best_params_['n_estimators'],
    learning_rate=random_search_bc.best_params_['learning_rate'],
    random_state=42
)

# Train the model with the best hyperparameters
best_adaboost_bc.fit(X_bc, y_bc)

Training Accuracy: 0.3010071942446043
Test Accuracy: 0.30134228187919465
Fitting 5 folds for each of 5 candidates, totalling 25 fits
Best Hyperparameters: {'n_estimators': 100, 'learning_rate': 1}


In [18]:
random_search_bc.best_params_

{'n_estimators': 100, 'learning_rate': 1}

In [19]:
# Define AdaBoost classifier
adaboost_bc = AdaBoostClassifier(
    n_estimators=50,  # Number of weak learners (default is 50)
    learning_rate=1.0,  # Weight applied to each weak learner (default is 1.0)
    random_state=42  # Seed for reproducibility (you can change this to any desired value)
)

# Train AdaBoost classifier
adaboost_bc.fit(X_train_bc, y_train_bc)

# Evaluate on training set
y_pred_train_bc = adaboost_bc.predict(X_train_bc)
print("Training Accuracy:", accuracy_score(y_train_bc, y_pred_train_bc)*100)

# Evaluate on test set
y_pred_test_bc = adaboost_bc.predict(X_test_bc)
print("Test Accuracy:", accuracy_score(y_test_bc, y_pred_test_bc)*100)

Training Accuracy: 96.2589928057554
Test Accuracy: 95.63758389261746
