In [1]:
from sklearn.datasets import fetch_openml
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from scipy.stats import mode
import numpy as np

In [2]:
mnist = fetch_openml('mnist_784', version=1, as_frame=False)
X, y = mnist.data, mnist.target

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=10000/70000, random_state=42)

# مسئله **1**

# **الف)**

In [None]:
decision_tree = DecisionTreeClassifier(random_state=42)
decision_tree.fit(X_train, y_train)
y_pred_single = decision_tree.predict(X_test)
accuracy_decision_tree = accuracy_score(y_test, y_pred_single)

print(f"accuracy of decision tree: {accuracy_decision_tree:.2%}")

accuracy of decision tree: 87.03%


# **ب)**

In [None]:
predictions = []
weights = []

for i in range(20):
    random_subset = np.random.choice(len(X_train), size=len(X_train) // 2, replace=False)
    random_feature = np.random.choice(X_train.shape[1], size=int(X_train.shape[1] * 0.7), replace=False)

    X_subset = X_train[random_subset][:, random_feature]
    y_subset = y_train[random_subset]

    tree = DecisionTreeClassifier(random_state=i)
    tree.fit(X_subset, y_subset)

    y_train_pred = tree.predict(X_train[:][:, random_feature])
    train_accuracy = accuracy_score(y_train, y_train_pred)
    weights.append(train_accuracy)

    X_test_subset = X_test[:, random_feature]
    y_test_pred = tree.predict(X_test_subset)
    predictions.append(y_test_pred)

    print(f"accuracy of tree number {i+1} on train data: {train_accuracy:.2%}")

mean_accuracy_trees = np.mean(weights)
print(f"mean accuracy of trees on train data: {mean_accuracy_trees:.2%}")

accuracy of tree number 1 on train data: 92.65%
accuracy of tree number 2 on train data: 92.54%
accuracy of tree number 3 on train data: 92.44%
accuracy of tree number 4 on train data: 92.47%
accuracy of tree number 5 on train data: 92.53%
accuracy of tree number 6 on train data: 92.38%
accuracy of tree number 7 on train data: 92.58%
accuracy of tree number 8 on train data: 92.65%
accuracy of tree number 9 on train data: 92.56%
accuracy of tree number 10 on train data: 92.33%
accuracy of tree number 11 on train data: 92.62%
accuracy of tree number 12 on train data: 92.59%
accuracy of tree number 13 on train data: 92.49%
accuracy of tree number 14 on train data: 92.69%
accuracy of tree number 15 on train data: 92.55%
accuracy of tree number 16 on train data: 92.55%
accuracy of tree number 17 on train data: 92.58%
accuracy of tree number 18 on train data: 92.38%
accuracy of tree number 19 on train data: 92.59%
accuracy of tree number 20 on train data: 92.31%
mean accuracy of trees on tra

# **پ)**

In [None]:
predictions = np.array(predictions, dtype=int)
majority_vote = mode(predictions, axis=0).mode.flatten()
accuracy_majority_vote = accuracy_score(y_test.astype(int), majority_vote)
print(f"دقت رای اکثریت: {accuracy_majority_vote:.2%}")

دقت رای اکثریت: 95.36%


In [None]:
weights = np.array(weights)
weights /= weights.sum()

weighted_votes = np.zeros((len(y_test), len(set(y_test))), dtype=float)
for i, pred in enumerate(predictions):
    for j in range(len(y_test)):
        weighted_votes[j, int(pred[j])] += weights[i]

weighted_vote = np.argmax(weighted_votes, axis=1)

accuracy_weighted_vote = accuracy_score(y_test.astype(int), weighted_vote)
print(f"دقت رأی‌گیری وزنی: {accuracy_weighted_vote:.2%}")

دقت رأی‌گیری وزنی: 95.53%


# **ت)**

In [None]:
rf = RandomForestClassifier(n_estimators=20, max_features=0.7, max_samples=0.5, random_state=42)
rf.fit(X_train, y_train)
rf_predictions = rf.predict(X_test)
accuracy_rf = accuracy_score(y_test, rf_predictions)
print(f"accuracy of Random Forest: {accuracy_rf:.2%}")

accuracy of Random Forest: 95.25%


# مسئله **2**

In [12]:
mnist = fetch_openml('mnist_784', version=1)
X, y = mnist.data, mnist.target.astype(int)
n_classes = len(np.unique(y))

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def adaboost_binary(X_train, y_train, X_test, y_test, n_estimators=20):
    n_samples, n_features = X_train.shape
    weights = np.ones(n_samples) / n_samples
    classifiers = []
    alphas = []

    for estimator in range(n_estimators):
        tree = DecisionTreeClassifier(max_depth=1)
        tree.fit(X_train, y_train, sample_weight=weights)
        y_pred = tree.predict(X_train)

        err = np.sum(weights * (y_pred != y_train)) / np.sum(weights)
        if err >= 0.5:
            continue
        if err == 0:
            break

        alpha = 0.5 * np.log((1 - err) / err)
        alphas.append(alpha)
        classifiers.append(tree)

        weights = weights * np.exp(-alpha * y_train * y_pred)
        weights /= np.sum(weights)

    def predict(X):
        final_prediction = np.zeros(X.shape[0])
        for alpha, tree in zip(alphas, classifiers):
            final_prediction += alpha * tree.predict(X)
        return final_prediction

    return classifiers, alphas, predict

def adaboost_multiclass(X_train, y_train, X_test, y_test, n_estimators=20):
    classifiers = {}
    alphas = {}

    for c in range(n_classes):
        y_train_binary = np.where(y_train == c, 1, -1)
        y_test_binary = np.where(y_test == c, 1, -1)
        classifiers[c], alphas[c], predict_fn = adaboost_binary(X_train, y_train_binary, X_test, y_test_binary, n_estimators)

    def predict(X):
        confidence_scores = np.zeros((X.shape[0], n_classes))
        for c in range(n_classes):
            predict_fn = adaboost_binary(X_train, np.where(y_train == c, 1, -1), X_test, np.where(y_test == c, 1, -1), n_estimators)[2]
            confidence_scores[:, c] = predict_fn(X)
        return np.argmax(confidence_scores, axis=1)

    y_pred = predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    return accuracy

accuracy = adaboost_multiclass(X_train, y_train, X_test, y_test, n_estimators=20)
print(f"Multiclass Adaboost Accuracy: {accuracy * 100:.2f}%")


Multiclass Adaboost Accuracy: 82.49%


In [None]:
adaboost_ready = AdaBoostClassifier(n_estimators=20, random_state=42)
adaboost_ready.fit(X_train, y_train)
y_pred_ready = adaboost_ready.predict(X_test)

accuracy_ready = accuracy_score(y_test, y_pred_ready)
print(f"Accuracy of Adaboost from sklearn: {accuracy_ready:.2f}%")