# urls to download data set

In [20]:
url= "https://raw.githubusercontent.com/N-Chandru/MTP/main/match_data_2010_2011.csv"
url1="https://raw.githubusercontent.com/N-Chandru/MTP/main/match_data_1995.csv"

# Importing required packages

In [21]:
%%capture
!pip install fancyimpute

In [22]:
import pandas as pd
import numpy as np
import math
from fancyimpute import KNN, NuclearNormMinimization, SoftImpute, BiScaler

# Data pre processing

In [23]:
def Data(Urls):
  frames = []
  for url in Urls:
    df = pd.read_csv(url)
    df = df[["playerA", "playerB"]]
    frames.append(df)
    data = pd.concat(frames)
  
  return data[data.playerB != 'N/A Bye']

In [24]:
data = Data([url])
data

Unnamed: 0,playerA,playerB
0,Andy Roddick,Peter Luczak
1,Carsten Ball,Mischa Zverev
2,Richard Gasquet,Jarkko Nieminen
3,Matthew Ebden,Jurgen Melzer
4,Tomas Berdych,Nick Lindahl
...,...,...
5883,Roger Federer,Juan Monaco
5884,Tomas Berdych,Andy Murray
5885,Jo-Wilfried Tsonga,John Isner
5886,Roger Federer,Tomas Berdych


# Pairwise Block Rank Algo.

## computing Pairwise Preference Matrix:-

${Pr(i≻j)}$ ≡ $\frac{1}{1+exp−(si−sj)}$

In [25]:
def Count(dataframe):
  count = {}
  Data = dataframe.values.tolist()

  for data in Data:
    data = tuple(data)
    if data in count:
      count[data] += 1
    else:
      count[data] = 1
  
  return count

def probability(Matrix):
  n, _ = Matrix.shape
  prob = np.zeros(shape = (n, n), dtype=float)
  skew = np.zeros(shape = (n, n), dtype=float)

  prob = 1.0/(1.0 + np.exp(Matrix.T- Matrix))
  
  with np.errstate(invalid='ignore', divide='ignore'):
    skew = np.log10(prob/prob.T)

  skew = np.nan_to_num(skew, nan=0, posinf=0, neginf=0)
  # np.fill_diagonal(skew, 0)
  return skew

def PPM(dataframe):
  A = dataframe['playerA'].tolist()
  B = dataframe['playerB'].tolist()
  Players = list(set(A) | set(B))
  Players_dict = {val: id for id, val in enumerate(Players)}
  preferenceMatrix = np.zeros(shape = (len(Players), len(Players)), dtype=float)
  count = Count(dataframe)
  
  for p1 in Players:
    for p2 in Players:
      if p1!=p2:
        preferenceMatrix[Players_dict[p1]][Players_dict[p2]] = count.get((p1, p2), 0)
        preferenceMatrix[Players_dict[p2]][Players_dict[p1]] = count.get((p2, p1), 0)
  
  return probability(preferenceMatrix)

def mask(matrix):
  matrix[matrix!=0]=1
  return matrix

In [26]:
P = PPM(data)
Mask = mask(P.copy())

## Matrix completion
Reference [link ](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6751420)

Input: $X, W ∈ R_{M ×N}, μ, λ, ρ$

while *not converged* do:
>while not converged do:

> >Update $ U = (ρZ + Y) *V * (ρV^{T}  V + λI_r)^{-1}$

>>Update $V = (ρZ + Y)*U*(ρU^{T}U + λI_r)^{-1}$

>>Update$Z = W.( \frac{1}{2 + ρ}(2X + ρ(UV^{T} − ρ^{-1}Y)))+ W .(UV^{T} − ρ^{−1}Y)$

>end while
  
$Y = Y + ρ(Z − UV^{T})$

$ρ = min (ρμ, 1e^{20} )$

end while

Output: Complete Matrix  $ Z = UV^{T}$ 

In [27]:
def Matrix_Completion(Matrix, r, Mask, mu=0.01, ro=1, l=0.001, eps=1e-5, steps=100):
  U = np.random.normal(size=(Matrix.shape[0], r))
  V = np.random.normal(size=(Matrix.shape[1], r))
  Z = U@V.T
  Y = np.zeros(shape=(Matrix.shape))
  I = np.eye(r)
  step = 0
  converged, converged2 = False, False

  while not converged:
    step2 =0
    while not converged2:
      U = (ro*Z +Y) @ V @ np.linalg.inv(ro*V.T @ V + l*I )
      V = (ro*Z +Y).T @ U @ np.linalg.inv(ro*U.T @ U + l*I )
      temp = U @V.T -1/ro * Y
      ZZ = Mask*(1/(2+ro) *(2*Matrix + ro*temp)) + (1-Mask)*temp
      converged2 = np.linalg.norm(Z-ZZ)/np.linalg.norm(Z) <eps or step2>steps
      step2+=1
      Z = ZZ.copy()
    temp = Z - U @ V.T
    Y = Y + ro*temp
    ro = min(ro*mu, 1e20)
    converged = np.linalg.norm(temp)<eps or step>steps
    step+=1
  
  Z = U @ V.T
  with np.errstate(invalid='ignore', divide='ignore'):
    Matrix = np.log10(abs(Z/Z.T))
  return Matrix


In [42]:
Matrix = Matrix_Completion(P, 294, Mask)

In [None]:
%%capture
X_incomplete_normalized = BiScaler().fit_transform(P)
P_completed = SoftImpute().fit_transform(X_incomplete_normalized)
np.fill_diagonal(P_completed, 0)
with np.errstate(invalid='ignore', divide='ignore'):
    Matrix = np.log10(abs(P_completed/P_completed.T))

In [None]:
(P_completed.transpose() == -P_completed).all()

False

# Tournament Construction


In [31]:
class Tournaments():
  def __init__(self, Matrix):
    self.Tournament = Matrix.copy()
    self.Tournament[self.Tournament>0]=1
    self.Tournament[self.Tournament<0]=0
    
  def Sub_tournament(self, Vertices):
    
    n = len(Vertices)
    Sub_tournament = np.zeros(shape = (n, n), dtype = int)
    
    for i, u in enumerate(Vertices):
      for j, v in enumerate(Vertices):
        if u!=v and self.Tournament[u][v] == 1:
          Sub_tournament[i][j]=1
    return Sub_tournament

In [32]:
T = Tournaments(Matrix)
Tour = T.Tournament
Tour.shape

(392, 392)

# Rank2Rank Algo

In [33]:
import collections
def flatten(lis):
  for item in lis:
    if isinstance(item, collections.Iterable) and not isinstance(item, str):
      for x in flatten(item):
        yield x
    else:        
      yield item

In [34]:
def Triangle(Tournament):
  
  n,m = Tournament.shape
  
  for u in range(n):
    x = np.where(Tournament[u] == 1)
    for v in x[0]:
      y = np.where(Tournament[v] == 1)
      for w in y[0]:
        if Tournament[w][u]:
          return [u,v,w]
  
  return []

def NTriangle(Tournament):

  n,m = Tournament.shape

  for u in range(n):
    for v in range(n):
      for w in range(n):
        if u!=v and v!=w and w!=u:
          if Tournament[u][v] and Tournament[v][w] and Tournament[w][u]:
            return [u, v, w]

  return []

def Topologicalsort(Tournament, Vertices):
  
  n,_ = Tournament.shape
  degree = np.zeros(shape = n, dtype = int)
  
  for u in range(n):
    x= np.where(Tournament[u] == 1)
    for v in x[0]:
      degree[v]+=1
      
  queue=[]
  x= np.where(degree == 0)
  for u in x[0]:
    queue.append(u)
    
  count = 0
  Topologicalorder =[]
  
  while queue:
    u = queue.pop(0)
    Topologicalorder.append(Vertices[u])
    
    x= np.where(Tournament[u] == 1)
    for v in x[0]:
      degree[v]-=1
      if degree[v] == 0:
        queue.append(v)
    count+=1
      
  if count!=n:
    return (False, [])
  else:
    return (True, Topologicalorder)

In [35]:
class MFAST():

  def __init__(self, Sigma):
     self.sigma = np.array(Sigma)
     self.n, = self.sigma.shape

  def Graph(self):
    graph = np.zeros(shape=(self.n,self.n), dtype=int)
    for u in range(self.n):
      for v in range(u+1, self.n):
        if self.sigma[u]!=self.sigma[v]:
          graph[u][v]=1
    
    return graph

  def Back_edge(self, graph, u, discovered, finished, edges):
    discovered.append(u)
    index= np.where(graph[u]==1)
    for v in index[0]:
      if v in discovered:
        edges.append([self.sigma[u], self.sigma[v]])
        break
      
      if v not in finished:
        self.Back_edge(graph, v, discovered, finished, edges)
    discovered.remove(u)
    finished.append(u)

  def dfs(self, graph):
    discovered = []
    finished= []
    edges = []

    for u in range(self.n):
      if u not in discovered and u not in finished:
        self.Back_edge(graph, u, discovered, finished, edges)
    
    return edges

  def Leftshift(self):
    
    res =math.inf
    for i in range(self.n):
      self.sigma = np.roll(self.sigma, 1)
      graph = self.Graph()
      back_edges = self.dfs(graph)
      if len(back_edges)<res:
        res = len(back_edges)
        Sigmastar = self.sigma
    return list(Sigmastar)

In [36]:
def R2R(Tournament, Vertices):
	
  n, _ = Tournament.shape
  flag, Sort = Topologicalsort(Tournament, Vertices)
  
  if flag:
    return Sort
  else:
    cycle = Triangle(Tournament)
    if len(cycle)==0: return Vertices
    A = [Vertices[i] for i in range(n) if Tournament[i][cycle[1]] and Tournament[cycle[2]][i] ]
    B = [Vertices[i] for i in range(n) if Tournament[i][cycle[2]] and Tournament[cycle[0]][i] ]
    C = [Vertices[i] for i in range(n) if Tournament[i][cycle[0]] and Tournament[cycle[1]][i] ]
    # A.append(cycle[0])
    # B.append(cycle[1])
    # C.append(cycle[2])
    Sigma = [R2R(T.Sub_tournament(A), A), R2R(T.Sub_tournament(B), B), R2R(T.Sub_tournament(C), C)]
    mfast = MFAST(list(flatten(Sigma)))
    Sigmastar = mfast.Leftshift()
  
  return Sigmastar


# Block Rank2Rank Algo

In [37]:
def BR2R(Tournament, Vertices):

  n, _ = Tournament.shape
  flag, Sort = Topologicalsort(Tournament, Vertices)

  if flag:
    return Sort
  else:
    cycle = Triangle(Tournament)
    if len(cycle)==0: return Vertices
    Splus = [Vertices[i] for i in range(n) if Tournament[i][cycle[0]] and Tournament[i][cycle[1]] and Tournament[i][cycle[2]]]
    Sminus = [Vertices[i] for i in range(n) if Tournament[cycle[0]][i] and Tournament[cycle[1]][i] and Tournament[cycle[2]][i]]
    S = list(set(Vertices)-(set(Splus) | set(Sminus)))
    SigmaStar = [BR2R(T.Sub_tournament(Splus), Splus), R2R(T.Sub_tournament(S), S), BR2R(T.Sub_tournament(Sminus), Sminus)]
    
  return list(flatten(SigmaStar))


In [38]:
sp, _ = Tour.shape
vert = [i for i in range(sp)]

In [43]:
Rank = BR2R(Tour, vert)
print(Rank, len(Rank))
d = set(Rank)
len(d)

[106, 185, 390, 199, 122, 245, 211, 158, 161, 256, 95, 12, 171, 324, 353, 370, 260, 70, 79, 346, 323, 63, 349, 69, 84, 151, 44, 20, 202, 131, 23, 334, 61, 113, 121, 382, 194, 0, 327, 304, 283, 359, 174, 137, 362, 105, 45, 369, 360, 19, 89, 311, 91, 28, 11, 198, 322, 74, 56, 98, 39, 32, 42, 273, 332, 8, 239, 232, 108, 219, 50, 166, 65, 2, 76, 376, 231, 169, 94, 336, 267, 243, 25, 112, 204, 216, 15, 288, 34, 83, 305, 200, 193, 291, 67, 14, 142, 230, 35, 227, 268, 103, 254, 225, 55, 1, 364, 30, 110, 275, 16, 344, 107, 302, 187, 73, 236, 9, 82, 343, 309, 251, 64, 139, 229, 281, 164, 186, 36, 301, 258, 46, 195, 375, 212, 279, 156, 261, 298, 140, 339, 345, 234, 286, 296, 3, 120, 272, 329, 206, 284, 271, 33, 152, 135, 66, 342, 159, 143, 77, 163, 133, 90, 52, 255, 145] 166


166

In [40]:
from tqdm import tqdm
Rank=[]
r=100
for i in tqdm(range(100, 392)):
  Matrix = Matrix_Completion(P, i, Mask)
  T = Tournaments(Matrix)
  R = BR2R(T.Tournament, vert)
  if len(R) > len(Rank):
    Rank=R
    r=i


100%|██████████| 292/292 [23:28<00:00,  4.82s/it]


In [41]:
print(len(Rank), r)

230 294
