In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

In [2]:
%%capture
import os

# check if this is a colab notebook and clone the repo if it is
if 'COLAB_GPU' in os.environ:
    %cd /content/
    !git clone https://github.com/Enver-group/twitch-web-analytics
    %cd twitch-web-analytics
    !gdown --id 11IfXaA66-D7vjA2R46uAilBgOPxYmKmn
else:
    os.chdir('..')

!pip install -r requirements.txt

!pip install -e .

In [3]:
#autoreload modules
%load_ext autoreload
%autoreload 2

from src.user import User

In [4]:
df = pd.read_csv("data/data_ibai.csv")
df.head()

Unnamed: 0,id,name,num_followers,broadcaster_type,description,lang,last_game_played_name,view_count,profile_image_url,created_at,user_follows
0,459331509,auronplay,10874269.0,partner,feliz pero no mucho,es,Minecraft,212164590,https://static-cdn.jtvnw.net/jtv_user_pictures...,2019-09-03T14:02:49Z,"['210708721', '431460701', '77649106', '130065..."
1,39276140,Rubius,10483602.0,partner,se contar hasta patata,es,Propnight,222893232,https://static-cdn.jtvnw.net/jtv_user_pictures...,2013-01-12T18:46:56Z,"['277057209', '603776329', '91136321', '198363..."
2,48878319,TheGrefg,8709137.0,partner,"Hola, me llamo David, me quedé calvo delante d...",es,Five Nights at Freddy's,197871081,https://static-cdn.jtvnw.net/jtv_user_pictures...,2013-09-12T00:45:27Z,"['143776262', '675347177', '33734881', '248222..."
3,83232866,ibai,8536540.0,partner,Si lees esto que sepas que te aprecio,es,VALORANT,286992630,https://static-cdn.jtvnw.net/jtv_user_pictures...,2015-02-20T16:47:56Z,"['35980866', '145908612', '205218019', '911363..."
4,121510236,juansguarnizo,6013656.0,partner,"Juan Sebastián Guarnizo Algarra, más conocido ...",es,Just Chatting,127964214,https://static-cdn.jtvnw.net/jtv_user_pictures...,2016-04-11T03:34:11Z,"['43419527', '517536651', '476005292', '109492..."


In [5]:
# reformating the variable user_follows
print(type(df["user_follows"][0]))
display(df["user_follows"].head())
df["user_follows"] = df["user_follows"].str[1:-1].str.replace("'","").str.split(", ")
df["user_follows"] = df["user_follows"].apply(lambda x: np.nan if x==[''] else x)
df["user_follows"] = df["user_follows"].apply(lambda x: list(map(int, x)) if isinstance(x, list) else x)
print(type(df["user_follows"][0]))
display(df["user_follows"].head())

<class 'str'>


0    ['210708721', '431460701', '77649106', '130065...
1    ['277057209', '603776329', '91136321', '198363...
2    ['143776262', '675347177', '33734881', '248222...
3    ['35980866', '145908612', '205218019', '911363...
4    ['43419527', '517536651', '476005292', '109492...
Name: user_follows, dtype: object

<class 'list'>


0    [210708721, 431460701, 77649106, 130065491, 42...
1    [277057209, 603776329, 91136321, 198363811, 48...
2    [143776262, 675347177, 33734881, 248222879, 18...
3    [35980866, 145908612, 205218019, 91136321, 524...
4    [43419527, 517536651, 476005292, 109492660, 55...
Name: user_follows, dtype: object

In [6]:
# converting the df into a graph
G = nx.DiGraph()
for i,user in df.iterrows():
  G.add_node(user["id"], **user.drop(["id","user_follows"]))
  if isinstance(user["user_follows"], list):
    G.add_edges_from([(user["id"], id_followed) for id_followed in user["user_follows"]]) 

In [7]:
# checking the correctness of the graph
print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges(), "\n")
for i, user in enumerate(G.degree()):
  if i >10:
    break
  print(G.nodes[user[0]]["name"], "is following to", user[1], "users")

Number of nodes: 315633
Number of edges: 1493481 

auronplay is following to 2878 users
josecristo_ is following to 251 users
xXxTheFocuSxXx is following to 289 users
Ampeterby7 is following to 624 users
javiDMr10 is following to 216 users
gtv_genesis is following to 336 users
Tanizen is following to 622 users
Carola is following to 828 users
CooLifeGame is following to 1084 users
Luzu is following to 768 users
ZormanWorld is following to 331 users


In [49]:
pagerank = nx.pagerank(G)
pagerank = dict(sorted(pagerank.items(), key=lambda x: x[1], reverse=True))

In [93]:
ranking = []
for i in pagerank.keys():
    if G.nodes[i] != {}:
        ranking.append(G.nodes[i])

In [102]:
ranking[32847]

{'name': 'DavimenXPro',
 'num_followers': 115.0,
 'broadcaster_type': 'affiliate',
 'description': 'Canal de Youtube: https://www.youtube.com/channel/UC_PbnWX16de3BVZoD-k5MZg',
 'lang': 'es',
 'last_game_played_name': 'Pokémon Brilliant Diamond/Shining Pearl',
 'view_count': 2328,
 'profile_image_url': 'https://static-cdn.jtvnw.net/jtv_user_pictures/79acfc05-8a2b-4817-a7b5-017ea07e26e3-profile_image-300x300.jpg',
 'created_at': '2014-04-18T15:03:20Z'}