## **Co-Training**

Co-Training is a semi-supervised learning algorithm where two separate classifiers are trained on two distinct views (feature sets) of the data.


**Imports**

In [3]:
import numpy as np
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score


**Data Loading**

In [None]:
# Create a synthetic dataset with some unlabeled data
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
y[::5] = -1  # Assigning -1 (unlabeled) to every 5th sample

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


**Minimal Preprocessing**

In [None]:
# No significant preprocessing required for this synthetic dataset


**Model Building**

In [None]:
# A simple base learner for co-training: A decision tree classifier
from sklearn.tree import DecisionTreeClassifier

# Define the co-training classifier
class CoTraining(BaseEstimator, ClassifierMixin):
    def __init__(self, base_learner=DecisionTreeClassifier(), n_iterations=10):
        self.base_learner = base_learner
        self.n_iterations = n_iterations
    
    def fit(self, X, y):
        # Split data into two views (features)
        X1, X2 = X[:, :X.shape[1]//2], X[:, X.shape[1]//2:]
        
        # Initialize two base learners
        self.learner1 = self.base_learner.fit(X1, y)
        self.learner2 = self.base_learner.fit(X2, y)
        
        for _ in range(self.n_iterations):
            # Generate pseudo-labels from each classifier for the unlabeled data
            pseudo_labels1 = self.learner1.predict(X1)
            pseudo_labels2 = self.learner2.predict(X2)
            
            # Add pseudo-labeled samples to each classifier's training set
            X1_new, y1_new = X1[pseudo_labels1 != -1], pseudo_labels1[pseudo_labels1 != -1]
            X2_new, y2_new = X2[pseudo_labels2 != -1], pseudo_labels2[pseudo_labels2 != -1]
            
            # Re-train with updated labels
            self.learner1.fit(X1_new, y1_new)
            self.learner2.fit(X2_new, y2_new)
        
        return self
    
    def predict(self, X):
        X1, X2 = X[:, :X.shape[1]//2], X[:, X.shape[1]//2:]
        pred1 = self.learner1.predict(X1)
        pred2 = self.learner2.predict(X2)
        # Combine predictions from both learners
        return np.round((pred1 + pred2) / 2)

# Initialize and train the co-training model
co_training_model = CoTraining(n_iterations=5)
co_training_model.fit(X_train, y_train)


**Predictions**

In [None]:
# Make predictions on the test set
y_pred = co_training_model.predict(X_test)


**Performance Metrics**

In [None]:
# Evaluate the model's performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")


**Visualizations**

In [None]:
# Visualizing the results (for demonstration purposes, we'll plot only two features)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred, cmap='plasma')
plt.title("Co-Training - Prediction")
plt.show()
