In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import numpy as np


In [None]:
# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target


In [None]:
def parallel_knn_decision_tree(X_train, X_test, y_train, y_test, num_partitions=5, n_neighbors=3):
    # Partitioning the training data
    X_train_partitions = np.array_split(X_train, num_partitions)
    y_train_partitions = np.array_split(y_train, num_partitions)

    # Fit KNN models on each partition
    knn_models = []
    for i in range(num_partitions):
        knn = KNeighborsClassifier(n_neighbors=n_neighbors)
        knn.fit(X_train_partitions[i], y_train_partitions[i])
        knn_models.append(knn)

    # Get centroids
    centroids = np.array([knn.predict(X_test) for knn in knn_models])

    # Train Decision Tree on centroids
    dt_model = DecisionTreeClassifier()
    dt_model.fit(centroids.T, y_test)
    y_pred = dt_model.predict(centroids.T)
    accuracy = accuracy_score(y_test, y_pred)
    return dt_model, accuracy

def sequential_knn_decision_tree(X_train, X_test, y_train, y_test, num_partitions=5, n_neighbors=3):
    # Partitioning the training data
    X_train_partitions = np.array_split(X_train, num_partitions)
    y_train_partitions = np.array_split(y_train, num_partitions)

    # Fit KNN models on each partition
    knn_models = []
    for i in range(num_partitions):
        knn = KNeighborsClassifier(n_neighbors=n_neighbors)
        knn.fit(X_train_partitions[i], y_train_partitions[i])
        knn_models.append(knn)

    # Combine neighbors from all partitions
    combined_neighbors = []
    for knn in knn_models:
        combined_neighbors.extend(knn.kneighbors(X_test)[1])

    # Calculate centroid from combined_neighbors
    combined_centroid = np.mean([X_train[idx] for idx in combined_neighbors], axis=0)

    # Train Decision Tree on combined centroid
    dt_model = DecisionTreeClassifier()
    dt_model.fit(np.array([combined_centroid]), np.array([y_test[0]]))  # Fit the model with a single target value
    y_pred = dt_model.predict(np.array([combined_centroid]))
    accuracy = accuracy_score([y_test[0]], y_pred)  # Calculate accuracy with a single target value
    return dt_model, accuracy


In [None]:
def sequential_knn_decision_tree(X_train, X_test, y_train, y_test, num_partitions=5, n_neighbors=3):
    # Partitioning the training data
    X_train_partitions = np.array_split(X_train, num_partitions)
    y_train_partitions = np.array_split(y_train, num_partitions)

    # Fit KNN models on each partition
    knn_models = []
    for i in range(num_partitions):
        knn = KNeighborsClassifier(n_neighbors=n_neighbors)
        knn.fit(X_train_partitions[i], y_train_partitions[i])
        knn_models.append(knn)

    # Combine neighbors from all partitions
    combined_neighbors = []
    for knn in knn_models:
        combined_neighbors.extend(knn.kneighbors(X_test)[1])

    # Calculate centroid from combined_neighbors
    combined_centroid = np.mean([X_train[idx] for idx in combined_neighbors], axis=0)

    # Train Decision Tree on combined centroid
    dt_model = DecisionTreeClassifier()
    dt_model.fit(np.array([combined_centroid]), np.array([y_test[0]]))  # Fit the model with a single target value
    y_pred = dt_model.predict(np.array([combined_centroid]))
    accuracy = accuracy_score([y_test[0]], y_pred)  # Calculate accuracy with a single target value
    return dt_model, accuracy


In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model 1: Parallel KNN followed by Decision Tree
dt_model_1, accuracy_1 = parallel_knn_decision_tree(X_train, X_test, y_train, y_test)
print("Model 1 Accuracy:", accuracy_1)

# Model 2: Sequential KNN and Decision Tree
dt_model_2, accuracy_2 = sequential_knn_decision_tree(X_train, X_test, y_train, y_test)
print("Model 2 Accuracy:", accuracy_2)


Model 1 Accuracy: 1.0


ValueError: Found array with dim 3. DecisionTreeClassifier expected <= 2.

In [None]:
def sequential_knn_decision_tree(X_train, X_test, y_train, y_test, num_partitions=5, n_neighbors=3):
    # Partitioning the training data
    X_train_partitions = np.array_split(X_train, num_partitions)
    y_train_partitions = np.array_split(y_train, num_partitions)

    # Fit KNN models on each partition
    knn_models = []
    for i in range(num_partitions):
        knn = KNeighborsClassifier(n_neighbors=n_neighbors)
        knn.fit(X_train_partitions[i], y_train_partitions[i])
        knn_models.append(knn)

    # Combine neighbors from all partitions
    combined_neighbors = []
    for knn in knn_models:
        combined_neighbors.extend(knn.kneighbors(X_test)[1])

    # Calculate centroid from combined_neighbors
    combined_centroid = np.mean([X_train[idx] for idx in combined_neighbors], axis=0)

    # Reshape the combined centroid
    combined_centroid = combined_centroid.reshape(1, -1)

    # Train Decision Tree on combined centroid
    dt_model = DecisionTreeClassifier()
    dt_model.fit(combined_centroid, [y_test[0]])  # Fit the model with a single target value
    y_pred = dt_model.predict(combined_centroid)
    accuracy = accuracy_score([y_test[0]], y_pred)  # Calculate accuracy with a single target value
    return dt_model, accuracy


In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model 1: Parallel KNN followed by Decision Tree
dt_model_1, accuracy_1 = parallel_knn_decision_tree(X_train, X_test, y_train, y_test)
print("Model 1 Accuracy:", accuracy_1)

# Model 2: Sequential KNN and Decision Tree
dt_model_2, accuracy_2 = sequential_knn_decision_tree(X_train, X_test, y_train, y_test)
print("Model 2 Accuracy:", accuracy_2)


Model 1 Accuracy: 1.0
Model 2 Accuracy: 1.0


In [None]:
from sklearn.datasets import load_iris
from sklearn.neighbors import NearestNeighbors
from sklearn.tree import DecisionTreeClassifier
import numpy as np

# Load the Iris dataset
iris = load_iris()
X, y = iris.data, iris.target

# 1. Partition the Data
# For simplicity, let's split the data into two partitions
X1, X2 = X[:75], X[75:]
y1, y2 = y[:75], y[75:]

# 2. Perform KNN on Each Partition
# Initialize KNN with appropriate parameters
knn = NearestNeighbors(n_neighbors=5)

# 3. Create Sets of K Nearest Neighbors and Calculate Centroids
def get_centroid(X, sample_index):
    knn.fit(X)
    distances, indices = knn.kneighbors([X[sample_index]])
    nearest_neighbors = X[indices[0]]
    centroid = np.mean(nearest_neighbors, axis=0)
    return centroid

# Calculate centroids for each partition
centroid1 = get_centroid(X1, 0)  # Example: choosing the first sample
centroid2 = get_centroid(X2, 0)  # Example: choosing the first sample

# 4. Combine All Centroids to Form Dataset R
R = np.vstack((centroid1, centroid2))

# 7. Run Decision Tree on Dataset R
# Initialize Decision Tree classifier
dt_classifier = DecisionTreeClassifier()

# Train Decision Tree on dataset R
dt_classifier.fit(R, [y1[0], y2[0]])  # Assuming the first sample's label represents the partition

# Test Decision Tree model (optional)
# Example: Predicting the label for a new sample using the trained Decision Tree
new_sample = np.array([[5.1, 3.5, 1.4, 0.2]])  # Example new sample
predicted_label = dt_classifier.predict(new_sample)
print("Predicted label:", predicted_label)


Predicted label: [0]
