<a href="https://colab.research.google.com/github/Arbaj-Wadagera/6thSem-ML-Lab/blob/main/1BM22CS051_Lab_9_AdaBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Load dataset
df = pd.read_csv('income.csv')

# Encode categorical features if any
df_encoded = df.copy()
for column in df_encoded.select_dtypes(include=['object']).columns:
    df_encoded[column] = LabelEncoder().fit_transform(df_encoded[column])

# Split features and target
X = df_encoded.drop('income_level', axis=1)
y = df_encoded['income_level']

# Split into training and testing sets (80/20)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train AdaBoost model
model = AdaBoostClassifier()
model.fit(X_train, y_train)

# Predict on test set
y_pred = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_matrix)


Accuracy: 0.8327362063670796
Confusion Matrix:
 [[7003  411]
 [1223 1132]]


In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import LabelEncoder

# Load the dataset
# Replace 'your_file.csv' with the path to your CSV file
data = pd.read_csv('iris.csv')

# Check if the data is loaded correctly
if data is None or data.empty:
    raise ValueError("The dataset is empty or not loaded correctly.")

# Assuming the last column is the target variable
X = data.iloc[:, :-1]  # Features
y = data.iloc[:, -1]   # Target variable

# Encode categorical target variable if it's a classification problem
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split the data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Function to evaluate AdaBoost with different estimators
def evaluate_adaboost(estimator, n_estimators_list, learning_rates):
    for n_estimators in n_estimators_list:
        for learning_rate in learning_rates:
            # Create the AdaBoost model
            model = AdaBoostClassifier(estimator=estimator, n_estimators=n_estimators, learning_rate=learning_rate, random_state=42)

            # Train the model
            model.fit(X_train, y_train)

            # Make predictions on the test set
            y_pred = model.predict(X_test)

            # Calculate accuracy score
            accuracy = accuracy_score(y_test, y_pred)
            print(f'Estimator: {estimator.__class__.__name__}, n_estimators: {n_estimators}, learning_rate: {learning_rate}, Accuracy: {accuracy:.2f}')

            # Generate confusion matrix
            conf_matrix = confusion_matrix(y_test, y_pred)
            print('Confusion Matrix:')
            print(conf_matrix)
            print('-' * 50)

# Define the range of n_estimators and learning rates to test
n_estimators_list = [50, 100, 150]
learning_rates = [0.01, 0.1, 1.0]

# Evaluate AdaBoost with Decision Tree as the base estimator
print("Evaluating AdaBoost with Decision Tree:")
evaluate_adaboost(DecisionTreeClassifier(max_depth=1), n_estimators_list, learning_rates)

# Evaluate AdaBoost with Logistic Regression as the base estimator
print("\nEvaluating AdaBoost with Logistic Regression:")
evaluate_adaboost(LogisticRegression(max_iter=200), n_estimators_list, learning_rates)

Evaluating AdaBoost with Decision Tree:
Estimator: DecisionTreeClassifier, n_estimators: 50, learning_rate: 0.01, Accuracy: 0.63
Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0 11  0]]
--------------------------------------------------
Estimator: DecisionTreeClassifier, n_estimators: 50, learning_rate: 0.1, Accuracy: 1.00
Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
--------------------------------------------------
Estimator: DecisionTreeClassifier, n_estimators: 50, learning_rate: 1.0, Accuracy: 0.93
Confusion Matrix:
[[10  0  0]
 [ 0  8  1]
 [ 0  1 10]]
--------------------------------------------------
Estimator: DecisionTreeClassifier, n_estimators: 100, learning_rate: 0.01, Accuracy: 0.97
Confusion Matrix:
[[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]
--------------------------------------------------
Estimator: DecisionTreeClassifier, n_estimators: 100, learning_rate: 0.1, Accuracy: 1.00
Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
-------------------------------------