<a href="https://colab.research.google.com/github/Siahkamari/Learning-to-Approximate-a-Bregman-Divergence/blob/master/Python/example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/Siahkamari/Learning-to-Approximate-a-Bregman-Divergence.git
%cd /content/Learning-to-Approximate-a-Bregman-Divergence/Python

Cloning into 'Learning-to-Approximate-a-Bregman-Divergence'...
remote: Enumerating objects: 891, done.[K
remote: Counting objects: 100% (482/482), done.[K
remote: Compressing objects: 100% (414/414), done.[K
remote: Total 891 (delta 117), reused 373 (delta 62), pack-reused 409[K
Receiving objects: 100% (891/891), 48.72 MiB | 22.88 MiB/s, done.
Resolving deltas: 100% (272/272), done.
/content/Learning-to-Approximate-a-Bregman-Divergence/Python


In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import urllib
import torch as th
import time
from tqdm.notebook import tqdm
device = th.device("cuda:0" if th.cuda.is_available() else "cpu")

## Download/Read the wine data from UCI ML repo

In [3]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine/wine.data'
urllib.request.urlretrieve(url, 'data/classification/wine.csv')

col_names = ['cultivars','Alcohol', 'Malic acid', 'Ash', 'Alcalinity of ash', 'Magnesium',\
            'Total phenols','Flavanoids','Nonflavanoid phenols','Proanthocyanins','Color intensity',\
            'Hue', 'OD280/OD315','Proline']

df = pd.read_csv('data/classification/wine.csv', names=col_names)

X = th.tensor(df.drop("cultivars",axis = 1).values,dtype=th.float32, device=device)
y = th.tensor(df["cultivars"].values,dtype=th.int32, device=device)

display(df.head())

Unnamed: 0,cultivars,Alcohol,Malic acid,Ash,Alcalinity of ash,Magnesium,Total phenols,Flavanoids,Nonflavanoid phenols,Proanthocyanins,Color intensity,Hue,OD280/OD315,Proline
0,1,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065
1,1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050
2,1,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185
3,1,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480
4,1,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735


## Split the data to train/test

In [4]:
n = X.shape[0]
# Permute the rows of X and y
rng = th.Generator(device = device).manual_seed(0)
I = th.randperm(n, generator=rng, device=device)

n_train = int(4/5*n)
X_train = X[I[0:n_train]]
y_train = y[I[0:n_train]]
X_test = X[I[n_train:]]
y_test = y[I[n_train:]]


## Normalize the data

In [5]:
def normalize_XX(X_train, X_test):
  mu = th.mean(X_train, dim=0)
  sigma = th.mean(th.abs(X_train - mu),dim=0)
  X_train = (X_train - mu)/sigma
  X_test = (X_test - mu)/sigma
  return X_train, X_test

X_train, X_test = normalize_XX(X_train, X_test)

## Train the model

In [6]:
from piecewise_linear_estimation import  PBDL
model = PBDL()

t1 = time.perf_counter()
model.fit(X_train, y_train)
t2 = time.perf_counter()


Search for lanbda:   0%|          | 0/7 [00:00<?, ?it/s]

lanbda =  1.42e+05 , n_iter = 113 , training score =  1.000 , validation score =  0.948
lanbda =  1.42e+04 , n_iter = 113 , training score =  1.000 , validation score =  0.948
lanbda =  1.42e+03 , n_iter = 113 , training score =  1.000 , validation score =  0.948
lanbda =  1.42e+02 , n_iter = 113 , training score =  1.000 , validation score =  0.948
lanbda =  1.42e+01 , n_iter = 250 , training score =  1.000 , validation score =  0.939
lanbda =  1.42e+00 , n_iter = 181 , training score =  1.000 , validation score =  0.933
lanbda =  1.42e-01 , n_iter = 227 , training score =  1.000 , validation score =  0.932


Search for lanbda:   0%|          | 0/4 [00:00<?, ?it/s]

lanbda =  1.42e+08 , n_iter = 113 , training score =  1.000 , validation score =  0.948
lanbda =  1.42e+07 , n_iter = 113 , training score =  1.000 , validation score =  0.948
lanbda =  1.42e+06 , n_iter = 113 , training score =  1.000 , validation score =  0.948
lanbda =  1.42e+05 , n_iter = 113 , training score =  1.000 , validation score =  0.948


## Print performances of the learned metric

1.   Pairwise similar pairs classification acc
2.   KNN classification acc
3.   Ranking auc, map







In [7]:
tasks = ['knn','pairwise', 'map', 'auc']
for task in tasks:
    score_test = model.score(X_test, y_test, X_train, y_train, task=task)
    score_train = model.score(X_train, y_train, task=task)
    print("training", task ,"=", "{:.3f}".format(score_train),
    "\ntest", task,"=", "{:.3f}".format(score_test))

print('elapsed time = ', "{:.2f}".format(t2 - t1), 'seconds')
print('n of iter = ', model.n_iter)

training knn = 1.000 
test knn = 1.000
training pairwise = 1.000 
test pairwise = 0.988
training map = 1.000 
test map = 1.000
training auc = 1.000 
test auc = 1.000
elapsed time =  24.79 seconds
n of iter =  141
