Practical 10: Implementation of Boosting Algorithms: AdaBoost, Stochastic Gradient

In [1]:
import pandas as pd

# Read the dataset
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv')

# Display the first few rows of the dataset
print(data.head())


    age  anaemia  creatinine_phosphokinase  diabetes  ejection_fraction  \
0  75.0        0                       582         0                 20   
1  55.0        0                      7861         0                 38   
2  65.0        0                       146         0                 20   
3  50.0        1                       111         0                 20   
4  65.0        1                       160         1                 20   

   high_blood_pressure  platelets  serum_creatinine  serum_sodium  sex  \
0                    1  265000.00               1.9           130    1   
1                    0  263358.03               1.1           136    1   
2                    0  162000.00               1.3           129    1   
3                    0  210000.00               1.9           137    1   
4                    0  327000.00               2.7           116    0   

   smoking  time  DEATH_EVENT  
0        0     4            1  
1        0     6            1  
2       

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score


# Splitting the dataset into features (X) and target variable (y)
X = data.drop('DEATH_EVENT', axis=1)  # Features
y = data['DEATH_EVENT']  # Target variable

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

# Creating the AdaBoostClassifier model
adaboost = AdaBoostClassifier(n_estimators=10, learning_rate=1)

# Training the model
model = adaboost.fit(X_train, y_train)

# Making predictions
y_pred = model.predict(X_test)

# Calculating the accuracy of the model
print("The accuracy of the model on the validation set is:", accuracy_score(y_test, y_pred))

The accuracy of the model on the validation set is: 0.8222222222222222


In [4]:
import pandas as sahil_pd
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from numpy import loadtxt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [10]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import time

# Read the dataset
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv')

# Splitting the dataset into features (X) and target variable (y)
X = data.iloc[:, 0:12].values
y = data.iloc[:, 12].values

# Splitting the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

# Define the models
models = [
    ('Logistic Regression', LogisticRegression()),
    ('KNN', KNeighborsClassifier()),
    ('SVM', SVC()),
    ('XGB', XGBClassifier(eta=0.01, gamma=10))
]

# Evaluate each model
results = []
names = []
scoring = 'accuracy'

for name, model in models:
    start_time = time.time()
    model.fit(x_train, y_train)
    elapsed_time = time.time() - start_time
    y_pred = model.predict(x_test)
    accuracy = accuracy_score(y_test, y_pred)
    results.append(accuracy)
    names.append(name)
    print(f"{name}: Accuracy = {accuracy}, Elapsed Time = {elapsed_time} seconds")


Logistic Regression: Accuracy = 0.8666666666666667, Elapsed Time = 0.0226132869720459 seconds
KNN: Accuracy = 0.6333333333333333, Elapsed Time = 0.001268625259399414 seconds
SVM: Accuracy = 0.7666666666666667, Elapsed Time = 0.006058454513549805 seconds
XGB: Accuracy = 0.8666666666666667, Elapsed Time = 0.07390570640563965 seconds


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import time

# Evaluate each model
for name, model in models:
    start_time = time.time()
    model.fit(x_train, y_train)
    elapsed_time = time.time() - start_time
    y_pred = model.predict(x_test)
    predictions = [round(value) for value in y_pred]
    accuracy = accuracy_score(y_test, predictions)
    print(f"{name}: Accuracy = {accuracy:.2f}%, Elapsed Time = {elapsed_time:.2f} seconds")


Logistic Regression: Accuracy = 0.87%, Elapsed Time = 0.04 seconds
KNN: Accuracy = 0.63%, Elapsed Time = 0.00 seconds
SVM: Accuracy = 0.77%, Elapsed Time = 0.01 seconds
XGB: Accuracy = 0.87%, Elapsed Time = 1.31 seconds


In [13]:
import pandas as pd
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Read the dataset
data = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/00519/heart_failure_clinical_records_dataset.csv')

# Splitting the dataset into features (X) and target variable (y)
X = data.iloc[:, :12].values
y = data.iloc[:, 12].values

# Splitting the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the classifiers
estimators = [
    ('Logistic Regression', LogisticRegression(solver='lbfgs', multi_class='auto', max_iter=200)),
    ('SVC', SVC(gamma='auto', probability=True)),
    ('Decision Tree Classifier', DecisionTreeClassifier())
]

# Hard voting
vote_hard = VotingClassifier(estimators=estimators, voting='hard')
vote_hard.fit(x_train, y_train)
y_pred_hard = vote_hard.predict(x_test)
score_hard = accuracy_score(y_test, y_pred_hard)
print("Hard Voting Score:", score_hard)

# Soft voting
vote_soft = VotingClassifier(estimators=estimators, voting='soft')
vote_soft.fit(x_train, y_train)
y_pred_soft = vote_soft.predict(x_test)
score_soft = accuracy_score(y_test, y_pred_soft)
print("Soft Voting Score:", score_soft)


Hard Voting Score: 0.7
Soft Voting Score: 0.7
