In [31]:
from ucimlrepo import fetch_ucirepo 
import numpy as np
from itertools import combinations
  
# Fetch dataset
wine = fetch_ucirepo(id=109)

# Data (features and targets as pandas DataFrames)
X = wine.data.features
y = wine.data.targets

In [None]:
from sklearn.preprocessing import StandardScaler

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [None]:
from metric_learn import LMNN, NCA, ITML
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [33]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
y_train = np.ravel(y_train)  # Convert to shape (n_samples,)
y_test = np.ravel(y_test)

In [36]:
# Create pairs of samples and corresponding similarity labels
def create_pairs(X, y):
    pairs = []
    labels = []

    # Iterate through all pairs of indices
    for i, j in combinations(range(len(y)), 2):
        pairs.append([X[i], X[j]])
        # Label 1 if they belong to the same class, -1 otherwise
        labels.append(1 if y[i] == y[j] else -1)
    
    return np.array(pairs), np.array(labels)

def create_pairs_iloc(X, y):
    pairs = []
    labels = []

    # Iterate through all pairs of indices
    for i, j in combinations(range(len(y)), 2):
        pairs.append([X.iloc[i], X.iloc[j]])
        # Label 1 if they belong to the same class, -1 otherwise
        labels.append(1 if y[i] == y[j] else -1)
    
    return np.array(pairs), np.array(labels)


# Generate pairs and labels for the training data
pairs_train, labels_train = create_pairs_iloc(X_train, y_train)

In [37]:
print(y.shape)

(178, 1)


In [38]:
# LMNN
lmnn = LMNN(k=3, learn_rate=1e-6)
lmnn.fit(X_train, y_train)
X_train_lmnn = lmnn.transform(X_train)
X_test_lmnn = lmnn.transform(X_test)



In [39]:
# NCA
nca = NCA(max_iter=100)
nca.fit(X_train, y_train)
X_train_nca = nca.transform(X_train)
X_test_nca = nca.transform(X_test)

In [40]:
# ITML
itml = ITML()
itml.fit(pairs_train, labels_train)
# Transform the training and testing data (not pairs anymore)
X_train_itml = itml.transform(X_train)
X_test_itml = itml.transform(X_test)

In [24]:
# k-NN Classifier
knn = KNeighborsClassifier(n_neighbors=3)

# Evaluate k-NN on original data
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
acc = accuracy_score(y_test, y_pred)

# Evaluate LMNN
knn.fit(X_train_lmnn, y_train)
y_pred_lmnn = knn.predict(X_test_lmnn)
lmnn_acc = accuracy_score(y_test, y_pred_lmnn)

# Evaluate NCA
knn.fit(X_train_nca, y_train)
y_pred_nca = knn.predict(X_test_nca)
nca_acc = accuracy_score(y_test, y_pred_nca)

# Evaluate ITML
knn.fit(X_train_itml, y_train)
y_pred_itml = knn.predict(X_test_itml)
itml_acc = accuracy_score(y_test, y_pred_itml)

print(f"Original Accuracy: {acc:.2f}")
print(f"LMNN Accuracy: {lmnn_acc:.2f}")
print(f"NCA Accuracy: {nca_acc:.2f}")
print(f"ITML Accuracy: {itml_acc:.2f}")


Original Accuracy: 0.96
LMNN Accuracy: 0.98
NCA Accuracy: 0.98
ITML Accuracy: 0.98


## Generate Synthetic Data that shows difference learning

In [41]:
from sklearn.datasets import make_classification


In [42]:
X, y = make_classification(n_samples=200, n_features=2, n_classes=2, n_informative=2, n_redundant=0, random_state=42)
X[:, 1] *= 1000

In [43]:
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
y_train = np.ravel(y_train)  # Convert to shape (n_samples,)
y_test = np.ravel(y_test)

In [45]:
# Generate pairs and labels for the training data
pairs_train, labels_train = create_pairs(X_train, y_train)

In [46]:
# LMNN
lmnn = LMNN(k=3, learn_rate=1e-6)
lmnn.fit(X_train, y_train)
X_train_lmnn = lmnn.transform(X_train)
X_test_lmnn = lmnn.transform(X_test)

# NCA
nca = NCA(max_iter=100)
nca.fit(X_train, y_train)
X_train_nca = nca.transform(X_train)
X_test_nca = nca.transform(X_test)

# ITML
itml = ITML()
itml.fit(pairs_train, labels_train)
# Transform the training and testing data (not pairs anymore)
X_train_itml = itml.transform(X_train)
X_test_itml = itml.transform(X_test)



In [47]:
# k-NN Classifier
knn = KNeighborsClassifier(n_neighbors=3)

# Evaluate k-NN on original data
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
acc = accuracy_score(y_test, y_pred)

# Evaluate LMNN
knn.fit(X_train_lmnn, y_train)
y_pred_lmnn = knn.predict(X_test_lmnn)
lmnn_acc = accuracy_score(y_test, y_pred_lmnn)

# Evaluate NCA
knn.fit(X_train_nca, y_train)
y_pred_nca = knn.predict(X_test_nca)
nca_acc = accuracy_score(y_test, y_pred_nca)

# Evaluate ITML
knn.fit(X_train_itml, y_train)
y_pred_itml = knn.predict(X_test_itml)
itml_acc = accuracy_score(y_test, y_pred_itml)

print(f"Original Accuracy: {acc:.2f}")
print(f"LMNN Accuracy: {lmnn_acc:.2f}")
print(f"NCA Accuracy: {nca_acc:.2f}")
print(f"ITML Accuracy: {itml_acc:.2f}")

Original Accuracy: 0.43
LMNN Accuracy: 0.85
NCA Accuracy: 0.85
ITML Accuracy: 0.83
