In [1]:
# !pip install -r requirements.txt

In [72]:
import numpy as np
from sklearn.svm import SVC
import itertools
from scipy.spatial import distance
from sklearn.linear_model import LogisticRegression
import pandas as pd
from sklearn.model_selection import train_test_split

class TernaryECOC:
    def __init__(self, n_classes, base_classifier=SVC(), n_classifiers=5):
        self.base_classifier = base_classifier
        self.classifiers = []
        self.num_classifiers = n_classifiers
        self.n_classes = n_classes
        if n_classifiers != n_classes * (n_classes - 1) / 2:
            raise ValueError("Number of classifiers must be equal to n_classes*(n_classes-1)/2")
        self.n_classifiers=n_classifiers
        self.ecoc_matrix= self.generate_ovo_codes()

    def train(self, X, y):
        for i in range(self.n_classifiers):
            ternary_code = self.ecoc_matrix[:, i]

            class_0 = np.where(ternary_code == 0)[0] # label as 1
            class_1 = np.where(ternary_code == 1)[0] # label as 0

            mask = np.isin(y, np.concatenate((class_0, class_1)))

        

            filtered_y = y[mask]
            filtered_X = X[mask,:]

 
            print("Training classifier {} with labels 0: {} and 1 :{}".format(i, np.where(ternary_code==0), np.where(ternary_code==1)))
            classifier = self.base_classifier.fit(filtered_X, filtered_y)
            self.classifiers.append((ternary_code, classifier))

    def predict(self, X):
        class_combinations = list(itertools.combinations(range(self.n_classes), 2))
        predictions = []
        for i in range(len(self.classifiers)):
            ternary_code, classifier = self.classifiers[i]
            binary_predictions = classifier.predict(X)
            print("Predictions for classifier {} class{}and {} are {}".format(i,class_combinations[i][0] ,class_combinations[i][1],np.unique(binary_predictions)))
            predictions.append(binary_predictions)
        return self.decode_labels(np.column_stack(predictions))

    def generate_ovo_codes(self):
        ecoc_matrix = np.ones((self.n_classes, self.n_classifiers), dtype=int)*-1
        class_combinations = list(itertools.combinations(range(self.n_classes), 2))
        for i, combination in enumerate(class_combinations):
            class_i, class_j = combination
            ecoc_matrix[class_i,i] = 1
            ecoc_matrix[class_j,i] = 0
        return ecoc_matrix

    def decode_labels(self, predictions):
        print ("dicoding predictions: ", predictions)
        print(self.ecoc_matrix)
        min_distances = []
        for pred_row in predictions:
            hamming_distances = distance.cdist([pred_row], self.ecoc_matrix, metric='hamming')
            min_distance_index = np.argmin(hamming_distances)
            min_distances.append(min_distance_index)
        return min_distances


# # Example usage
# X_train = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10], [11, 12]])
# y_train = np.array([0, 1, 2, 1, 2, 0])
# X_test = np.array([[13, 14], [15, 16], [17, 18]])

# ecoc = TernaryECOC(3,LogisticRegression(),  3)
# ecoc.train(X_train, y_train)
# predictions = ecoc.predict(X_test)
# print(predictions)



# df=pd.read_csv('data/glass.csv',header=None)
# X=df.iloc[:,:-1].values
# y=df.iloc[:,-1].values

X,y =get_X_y_data()
# X,y = [[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[1],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[0],[2],[2],[2]],[1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2]
# X=np.array(X)
# y=np.array(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
n_classes = len(np.unique(y))
n_classifiers = n_classes * (n_classes - 1) // 2
ecoc = TernaryECOC(n_classes,LogisticRegression(),  n_classifiers)
ecoc.train(X_train, y_train)
predictions = ecoc.predict(X_test[4:5,:])
print(predictions)

ValueError: n_classes(3) * n_clusters_per_class(1) must be smaller or equal 2**n_informative(1)=2

In [69]:
y_test[4:5]

array([2])

In [22]:
predictions

[0, 2, 0, 0, 0, 0, 0, 2, 2, 0, 2, 2, 0, 0, 0, 2, 0, 2, 0, 2]

In [23]:
y_test

array([1, 2, 1, 1, 0, 1, 1, 2, 2, 0, 0, 1, 0, 0, 0, 2, 1, 2, 1, 1])

In [34]:
X_test[0:1,:]

array([[-2.22218546, -2.66822292,  1.26032575,  0.17933254]])

In [76]:
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification

def get_X_y_data():
# Generate a synthetic classification dataset
    X, y = make_classification(
        n_samples=100,  # Number of samples
    n_features=2,
        n_informative=2, 
        n_redundant=0,
        n_classes=3,
        n_clusters_per_class
        random_state=42  # Random seed for reproducibility
    )

    # Extract the first feature from the dataset
    first_feature = X[:, 0]

    # Create a scatter plot
    plt.scatter(first_feature, range(len(first_feature)), c=y, cmap='viridis')

    # Set labels and title
    plt.xlabel('First Feature')
    plt.ylabel('Index')
    plt.title('Scatter Plot with Class Colors')

    # Show the plot
    plt.show()

    return X,y


SyntaxError: invalid syntax. Perhaps you forgot a comma? (2827836867.py, line 12)

In [53]:
get_X_y_data()

ValueError: n_classes(3) * n_clusters_per_class(1) must be smaller or equal 2**n_informative(1)=2