In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import euclidean_distances

# Load datasets
train_data = pd.read_csv('X.csv', header=None).values
test_data = pd.read_csv('y.csv', header=None).values

X_train = train_data[:, :2]  # Features (weight, height)
y_train = train_data[:, 2]   # Labels (1, 2, 3)

X_test = test_data           # Test set features


# Generalized Context Model (GCM)
class GeneralizedContextModel:
    def __init__(self, sensitivity=1.0):
        self.sensitivity = sensitivity
        self.prototypes = {}

    def fit(self, X, y):
        for label in np.unique(y):
            subset = X[y == label]
            self.prototypes[label] = np.mean(subset, axis=0)

    def predict(self, X):
        predictions = []
        for x in X:
            similarities = []
            for label, prototype in self.prototypes.items():
                distance = np.linalg.norm(x - prototype)
                similarity = np.exp(-self.sensitivity * distance)
                similarities.append((label, similarity))
            predictions.append(max(similarities, key=lambda item: item[1])[0])
        return np.array(predictions)


# Rational Model of Categorization (RMC)
class RationalModel:
    def __init__(self, alpha=1.0):
        self.alpha = alpha
        self.categories = {}

    def fit(self, X, y):
        for label in np.unique(y):
            subset = X[y == label]
            mean = np.mean(subset, axis=0)
            cov = np.cov(subset, rowvar=False)
            self.categories[label] = (mean, cov + np.eye(cov.shape[0]) * 1e-6)

    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = []
            for label, (mean, cov) in self.categories.items():
                diff = x - mean
                likelihood = np.exp(-0.5 * diff.T @ np.linalg.inv(cov) @ diff)
                posteriors.append((label, likelihood))
            predictions.append(max(posteriors, key=lambda item: item[1])[0])
        return np.array(predictions)


# Function to test exchangeability
def test_exchangeability(ModelClass, X_train, y_train, X_test, n_permutations=5):
    predictions_list = []
    for _ in range(n_permutations):
        # Shuffle training data
        indices = np.random.permutation(len(X_train))
        X_shuffled = X_train[indices]
        y_shuffled = y_train[indices]

        # Train and predict
        model = ModelClass()
        model.fit(X_shuffled, y_shuffled)
        predictions = model.predict(X_test)
        predictions_list.append(predictions)

    # Check if predictions are consistent
    consistency = all(np.array_equal(predictions_list[0], preds) for preds in predictions_list)
    return predictions_list, consistency


# Test exchangeability for GCM
gcm_predictions, gcm_consistent = test_exchangeability(GeneralizedContextModel, X_train, y_train, X_test)
print("GCM Predictions for each permutation:")
for preds in gcm_predictions:
    print(preds)
print(f"GCM Predictions consistent across permutations? {gcm_consistent}")

# Test exchangeability for RMC
rmc_predictions, rmc_consistent = test_exchangeability(RationalModel, X_train, y_train, X_test)
print("\nRMC Predictions for each permutation:")
for preds in rmc_predictions:
    print(preds)
print(f"RMC Predictions consistent across permutations? {rmc_consistent}")


GCM Predictions for each permutation:
[3 2 3 2 2 3 2 2 2 1]
[3 2 3 2 2 3 2 2 2 1]
[3 2 3 2 2 3 2 2 2 1]
[3 2 3 2 2 3 2 2 2 1]
[3 2 3 2 2 3 2 2 2 1]
GCM Predictions consistent across permutations? True

RMC Predictions for each permutation:
[3 2 3 2 2 3 2 2 2 1]
[3 2 3 2 2 3 2 2 2 1]
[3 2 3 2 2 3 2 2 2 1]
[3 2 3 2 2 3 2 2 2 1]
[3 2 3 2 2 3 2 2 2 1]
RMC Predictions consistent across permutations? True
