In [1]:
import pandas as pd
import numpy as np

In [2]:
stations = pd.read_csv("Moscow_Subway/stations_data.txt")
routes = pd.read_csv("Moscow_Subway/routes_data.txt")

In [3]:
stations.head()

Unnamed: 0,id,line_name,station_name,station_type,delay_morning,delay_rush,delay_night,lat,lon
0,0,Sokolnicheskaya,Bulvar Rokossovskogo,deadend,120,90,360,55.814269,37.735117
1,1,Sokolnicheskaya,Cherkizovskaya,pass,120,90,360,55.802991,37.744825
2,2,Sokolnicheskaya,Preobrazhenskaya ploschad,pass,120,90,360,55.796172,37.715022
3,3,Sokolnicheskaya,Sokolniki,pass,120,90,360,55.7892,37.679706
4,4,Sokolnicheskaya,Krasnoselskaya,pass,120,90,360,55.779853,37.666077


In [4]:
routes.head()

Unnamed: 0,id_from,id_to,route_type,delay
0,0,1,drive,120
1,1,0,drive,120
2,1,2,drive,240
3,2,1,drive,240
4,2,3,drive,180


In [5]:
# количество станций - вершин в графе
stations_number = stations.shape[0]
stations_number

194

In [6]:
# матрица смежности
A = np.zeros((stations_number, stations_number), dtype=int)

stations_name = [0, ] * stations_number

for _, row in routes.iterrows():
    A[row.id_from, row.id_to] = 1
    stations_name[row.id_from] = stations.loc[stations.id == row.id_from, "station_name"].values[0]
    

In [7]:
np.savetxt("Moscow_Subway/SubwayAdj.txt", A, delimiter=",", fmt="%d")

In [9]:
with open("Moscow_Subway/StationName.txt", "w") as f:
    f.write(",".join(stations_name))

PageRank - из лекций по графам

In [10]:
# матрица перехода
A_sum = A.sum(axis=1)[:, None]
P = A / np.where(A_sum > 0, A_sum, 1)
# вектор телепортации
v = np.ones(A.shape[0]) / A.shape[0] 
# page-rank
rank = np.ones(A.shape[0]) / A.shape[0] 

alpha = 0.99
k = 1000
for _ in range(k):
    rank = alpha * P.T @ rank + (1 - alpha) * v

In [11]:
np.array(stations_name)[np.argsort(rank)[::-1]][:10]

array(['Kievskaya', 'Arbatskaya', 'Biblioteka imeni Lenina', 'Kievskaya',
       'Kievskaya', 'Kurskaya', 'Taganskaya', 'Chkalovskaya',
       'Marksistskaya', 'Kurskaya'], dtype='<U27')

In [12]:
np.array(stations_name)[np.argsort(rank)][:10]

array(['Marina roscha', 'Mezhdunarodnaya', 'Novokosino',
       'Bulvar Rokossovskogo', 'Ulitsa Sergeya Eyzenshteyna', 'Vyhino',
       'Yugo-Zapadnaya', 'Rechnoy vokzal', 'Alma-Atinskaya',
       'Schelkovskaya'], dtype='<U27')

PageRank - из лекций по HPC

In [23]:
rank = np.ones(A.shape[0]) / A.shape[0] 

A_sum = A.sum(axis=1)[:, None]
P = A / np.where(A_sum > 0, A_sum, 1)

P = P.T * alpha + np.ones_like(P) * (1 - alpha) / A.shape[0]

for _ in range(k):
    y = P @ rank
    rank = y / np.linalg.norm(y)

In [24]:
np.array(stations_name)[np.argsort(rank)[::-1]][:10]

array(['Kievskaya', 'Arbatskaya', 'Biblioteka imeni Lenina', 'Kievskaya',
       'Kievskaya', 'Kurskaya', 'Taganskaya', 'Chkalovskaya',
       'Marksistskaya', 'Kurskaya'], dtype='<U27')

In [15]:
np.array(stations_name)[np.argsort(rank)][:10]

array(['Marina roscha', 'Mezhdunarodnaya', 'Novokosino',
       'Bulvar Rokossovskogo', 'Ulitsa Sergeya Eyzenshteyna', 'Vyhino',
       'Yugo-Zapadnaya', 'Rechnoy vokzal', 'Alma-Atinskaya',
       'Schelkovskaya'], dtype='<U27')