In [1]:
#semi supervised learning
from sklearn.base import BaseEstimator
import numpy as np

class SelfLearningModel(BaseEstimator):
    def __init__(self, basemodel, max_iter=200, prob_threshold=0.8):
        self.model = basemodel
        self.max_iter = max_iter
        self.prob_threshold = prob_threshold
        
    def fit(self, X, y):
        labeled_mask = y != -1
        labeled_X, labeled_y = X[labeled_mask], y[labeled_mask]
        unlabeled_X = X[~labeled_mask]
        self.model.fit(labeled_X, labeled_y)
        
        for _ in range(self.max_iter):
            unlabeled_y = self.model.predict(unlabeled_X)
            unlabeled_prob = np.max(self.model.predict_proba(unlabeled_X), axis=1)
            confident_indices = unlabeled_prob > self.prob_threshold
            if not np.any(confident_indices):
                break
            labeled_X = np.vstack([labeled_X, unlabeled_X[confident_indices]])
            labeled_y = np.hstack([labeled_y, unlabeled_y[confident_indices]])
            unlabeled_X = unlabeled_X[~confident_indices]
        return self


In [None]:
# Random Forest classifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

# Make predictions on the testing data
y_pred = rf_classifier.predict(X_test)

# Calculate the accuracy of the classifier
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


In [None]:
#Decision Tree, Random Forest, and Extremely Randomized Trees
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.metrics import accuracy_score

# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the classifiers
dt_classifier = DecisionTreeClassifier(random_state=42)
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
et_classifier = ExtraTreesClassifier(n_estimators=100, random_state=42)

# Train the classifiers
dt_classifier.fit(X_train, y_train)
rf_classifier.fit(X_train, y_train)
et_classifier.fit(X_train, y_train)

# Make predictions
dt_pred = dt_classifier.predict(X_test)
rf_pred = rf_classifier.predict(X_test)
et_pred = et_classifier.predict(X_test)

# Calculate accuracies
dt_accuracy = accuracy_score(y_test, dt_pred)
rf_accuracy = accuracy_score(y_test, rf_pred)
et_accuracy = accuracy_score(y_test, et_pred)

# Print the accuracies
print("Decision Tree Accuracy:", dt_accuracy)
print("Random Forest Accuracy:", rf_accuracy)
print("Extra Trees Accuracy:", et_accuracy)
