In [129]:
import pandas as pd
import numpy as np
from scipy import spatial
from sklearn.preprocessing import normalize
from scipy import sparse
from pyunlocbox import functions, solvers
from scipy import spatial
from matplotlib import pyplot as plt

In [132]:
df = pd.read_csv('processed.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Release date,Max resolution,Low resolution,Effective pixels,Zoom wide (W),Zoom tele (T),Normal focus range,Macro focus range,Storage included,Weight (inc. batteries),Dimensions,Price
0,0,1997.0,1024.0,640.0,0.0,38.0,114.0,70.0,40.0,4.0,,,
1,1,1998.0,1280.0,,1.0,38.0,114.0,,0.0,4.0,420.0,158.0,
2,2,2000.0,640.0,,0.0,45.0,45.0,0.0,,,,0.0,
3,3,,1152.0,,0.0,35.0,,0.0,0.0,,0.0,0.0,269.0
4,4,1999.0,1152.0,640.0,,43.0,43.0,50.0,0.0,40.0,300.0,128.0,1299.0


# K- NN Imputation

We are making use of k-NN imputation as discussed in the answer script

In [138]:
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=10, weights='distance', metric='nan_euclidean')
# fit on the dataset
imputer.fit(df)
# transform the dataset
X =imputer.transform(df)

In [140]:
from sklearn.preprocessing import normalize
#X = df.to_numpy()
X_norm = normalize(X, axis=0, norm='max')
#X_norm

#print(X_norm)

## Graph Learning algo

Implementation of the paper : Kalofolias, V. ["How to learn a graph from smooth signals"](https://arxiv.org/abs/1601.02513), AISTATS, 2016

Logic credits : https://github.com/rodrigo-pena/graph-learning which is under MIT License

In [141]:
def mapping(n):
    #Total number of edges
    ne = int(n*(n-1)/2)
    r1 = np.zeros((ne, ))
    r2 = np.zeros((ne, ))
    itr = 0
    for i in np.arange(1, n):
        r1[itr: (itr + (n - i))] = i - 1
        r2[itr: (itr + (n - i))] = np.arange(i, n)
        itr = itr + n - i
    row = np.concatenate((r1, r2))
    col = np.concatenate((np.arange(0, ne), np.arange(0, ne)))
    values = np.ones(len(row))
    K = sparse.coo_matrix((values, (row, col)), shape=(n, ne))
    return lambda w: K.dot(w), lambda d: K.transpose().dot(d)

In [142]:
def learngraph(X,alpha=1,beta=1,step=0.5,maxit=1000, rtol=1e-5):
    N = X.shape[0]
    z = spatial.distance.pdist(X,'euclidean') # calculating pairwise distance
    w0 = np.zeros(z.shape)
    K, Kt = mapping(N)
    norm_K = np.sqrt(2 * (N - 1))
    
    # Assemble functions in the objective
    f1 = functions.func()
    f1._eval = lambda w: 2 * np.dot(w, z)
    f1._prox = lambda w, gamma: np.maximum(0, w - (2 * gamma * z))

    f2 = functions.func()
    f2._eval = lambda w: - alpha * np.sum(np.log(np.maximum(
        np.finfo(np.float64).eps, K(w))))
    f2._prox = lambda d, gamma: np.maximum(
        0, 0.5 * (d + np.sqrt(d**2 + (4 * alpha * gamma))))

    f3 = functions.func()
    f3._eval = lambda w: beta * np.sum(w**2)
    f3._grad = lambda w: 2 * beta * w
    lipg = 2 * beta
    
    # Rescale stepsize
    stepsize = step / (1 + lipg + norm_K)

    # Solve problem
    solver = solvers.mlfbf(L=K, Lt=Kt, step=stepsize)
    problem = solvers.solve([f1, f2, f3], x0=w0, solver=solver, maxit=maxit,
                            rtol=rtol)

    # Transform weight matrix from vector form to matrix form
    W = spatial.distance.squareform(problem['sol'])
    W[W<0] = 0
    return W

In [143]:
beta=5
alpha = 1
W = learngraph(X_norm,beta = beta,alpha = 1)

Solution found after 354 iterations:
    objective function f(sol) = -9.945113e+00
    stopping criterion: RTOL


## Results

In [144]:
print(W.shape)
#print(np.around(W))
print(W)

(1038, 1038)
[[0.         0.00230064 0.         ... 0.         0.         0.        ]
 [0.00230064 0.         0.         ... 0.         0.         0.        ]
 [0.         0.         0.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 0.         0.0488914  0.01494779]
 [0.         0.         0.         ... 0.0488914  0.         0.02654794]
 [0.         0.         0.         ... 0.01494779 0.02654794 0.        ]]


In [145]:
'''
import matplotlib.pyplot as plt 
import networkx as nx 

A = np.zeros(W.shape)
for i in range(W.shape[0]):
   for j in range(W.shape[1]):
       if(W[i][j]!=0):
           A[i][j]=1
        else:
            A[i][j]=0


plt.rcParams["figure.figsize"] = [15, 15]
plt.rcParams["figure.autolayout"] = True
data2D = np.random.random((50, 50))
im = plt.imshow(A, cmap="copper_r")
plt.colorbar(im)
plt.title(' A concise map of Adjacency matrix',fontsize=25);
plt.show()
'''

'\nimport matplotlib.pyplot as plt \nimport networkx as nx \n\nA = np.zeros(W.shape)\nfor i in range(W.shape[0]):\n   for j in range(W.shape[1]):\n       if(W[i][j]!=0):\n           A[i][j]=1\n        else:\n            A[i][j]=0\n\n\nplt.rcParams["figure.figsize"] = [15, 15]\nplt.rcParams["figure.autolayout"] = True\ndata2D = np.random.random((50, 50))\nim = plt.imshow(A, cmap="copper_r")\nplt.colorbar(im)\nplt.title(\' A concise map of Adjacency matrix\',fontsize=25);\nplt.show()\n'

In [147]:

# Displaying graph nodes
G = nx.Graph()
for i in range(W.shape[0]):
    for j in range(W.shape[1]): 
       if A[i][j] == 1: 
          G.add_edge(i,j)
list(G.nodes)
list(G.edges)

[(0, 5),
 (0, 7),
 (0, 9),
 (0, 11),
 (0, 13),
 (0, 16),
 (0, 24),
 (0, 32),
 (0, 47),
 (0, 52),
 (0, 53),
 (0, 54),
 (0, 55),
 (0, 56),
 (0, 58),
 (0, 59),
 (0, 60),
 (0, 61),
 (0, 84),
 (0, 85),
 (0, 126),
 (0, 127),
 (0, 158),
 (0, 160),
 (0, 161),
 (0, 167),
 (0, 174),
 (0, 176),
 (0, 185),
 (0, 187),
 (0, 192),
 (0, 194),
 (0, 195),
 (0, 204),
 (0, 206),
 (0, 208),
 (0, 209),
 (0, 211),
 (0, 218),
 (0, 222),
 (0, 223),
 (0, 286),
 (0, 287),
 (0, 288),
 (0, 290),
 (0, 295),
 (0, 297),
 (0, 298),
 (0, 299),
 (0, 300),
 (0, 302),
 (0, 304),
 (0, 305),
 (0, 312),
 (0, 313),
 (0, 314),
 (0, 315),
 (0, 316),
 (0, 318),
 (0, 364),
 (0, 367),
 (0, 368),
 (0, 369),
 (0, 370),
 (0, 377),
 (0, 378),
 (0, 379),
 (0, 380),
 (0, 383),
 (0, 386),
 (0, 387),
 (0, 389),
 (0, 391),
 (0, 392),
 (0, 396),
 (0, 401),
 (0, 402),
 (0, 403),
 (0, 404),
 (0, 405),
 (0, 406),
 (0, 419),
 (0, 452),
 (0, 470),
 (0, 472),
 (0, 482),
 (0, 500),
 (0, 510),
 (0, 511),
 (0, 550),
 (0, 552),
 (0, 563),
 (0, 565),
