In [2]:
import pandas as pd
import numpy as np


data = pd.read_csv('/kaggle/input/dataset-titanic/titanic.csv')

X = data.drop('Survived', axis=1).fillna(0)
y = data['Survived'].values

X = pd.get_dummies(X)

X = X.values




In [4]:
class DecisionStump:
    def __init__(self):
        self.feature_index = None
        self.threshold = None
        self.polarity = 1
        self.alpha = 0

    def fit(self, X, y, sample_weight):
        m, n = X.shape
        min_error = float('inf')

        for feature_index in range(n):
            thresholds = np.unique(X[:, feature_index])
            for threshold in thresholds:
                for polarity in [1, -1]:
                    predictions = np.ones(m)
                    predictions[X[:, feature_index] < threshold] = -1
                    predictions *= polarity

                    error = sum(sample_weight[y != predictions])

                    if error < min_error:
                        self.alpha = 0.5 * np.log((1 - error) / (error + 1e-10))
                        min_error = error
                        self.feature_index = feature_index
                        self.threshold = threshold
                        self.polarity = polarity

    def predict(self, X):
        m = X.shape[0]
        predictions = np.ones(m)
        predictions[X[:, self.feature_index] < self.threshold] = -1
        return predictions * self.polarity

In [5]:
class AdaBoost:
    def __init__(self, n_estimators=50):
        self.n_estimators = n_estimators
        self.stumps = []
        self.alphas = []

    def fit(self, X, y):
        m = X.shape[0]
        sample_weight = np.ones(m) / m

        for _ in range(self.n_estimators):
            stump = DecisionStump()
            stump.fit(X, y, sample_weight)
            predictions = stump.predict(X)
            # Tính toán trọng số mẫu mới
            sample_weight *= np.exp(-y * predictions * stump.alpha)
            sample_weight /= np.sum(sample_weight)

            self.stumps.append(stump)
            self.alphas.append(stump.alpha)

    def predict(self, X):
        m = X.shape[0]
        final_predictions = np.zeros(m)

        for stump, alpha in zip(self.stumps, self.alphas):
            final_predictions += alpha * stump.predict(X)

        return np.sign(final_predictions)


In [6]:
#Dự đoán và đánh giá
model = AdaBoost(n_estimators=50)
y = np.where(y == 0, -1, 1)  
model.fit(X, y)
predictions = model.predict(X)

# Tính độ sống xót
accuracy = np.mean(predictions == y)
print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.83
