In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import networkx as nx

In [2]:
%%capture
import os

# check if this is a colab notebook and clone the repo if it is
if 'COLAB_GPU' in os.environ:
    %cd /content/
    !git clone https://github.com/Enver-group/twitch-web-analytics
    %cd twitch-web-analytics
    !gdown --id 11IfXaA66-D7vjA2R46uAilBgOPxYmKmn
else:
    os.chdir('..')

!pip install -r requirements.txt

!pip install -e .

In [3]:
#autoreload modules
%load_ext autoreload
%autoreload 2

from src.user import User

In [5]:
df = pd.read_feather("data/streamers.feather")
df.dropna(subset=['num_followers'],inplace=True)
df_streamers_exploded = df.explode("user_follows")
df_streamers_exploded = df_streamers_exploded[df_streamers_exploded["user_follows"].isin(df.id)]
user_follows_arrays = df_streamers_exploded.groupby("id").user_follows.apply(np.array).reset_index()
df = df.set_index("id")
df.loc[user_follows_arrays.id,"user_follows"] = user_follows_arrays.user_follows.values
not_in_set_or_null = ~df.index.isin(user_follows_arrays.id)
df.loc[df.user_follows.isnull(),"user_follows"] =  pd.Series([[]]*df.user_follows.isnull().sum()).values
df.reset_index(level=0, inplace=True)
df

  df.loc[df.user_follows.isnull(),"user_follows"] =  pd.Series([[]]*df.user_follows.isnull().sum()).values


Unnamed: 0,id,name,num_followers,broadcaster_type,description,lang,last_game_played_name,view_count,profile_image_url,created_at,user_follows
0,459331509,auronplay,10874205.0,partner,feliz pero no mucho,es,Minecraft,212166590,https://static-cdn.jtvnw.net/jtv_user_pictures...,2019-09-03 14:02:49,"[210708721, 431460701, 77649106, 130065491, 42..."
1,39276140,Rubius,10483570.0,partner,se contar hasta patata,es,Propnight,222921576,https://static-cdn.jtvnw.net/jtv_user_pictures...,2013-01-12 18:46:56,"[91136321, 198363811, 210708721, 133528221, 70..."
2,48878319,TheGrefg,8708686.0,partner,"Hola, me llamo David, me quedé calvo delante d...",es,Five Nights at Freddy's,197916916,https://static-cdn.jtvnw.net/jtv_user_pictures...,2013-09-12 00:45:27,"[143776262, 33734881, 248222879, 188499010, 27..."
3,83232866,ibai,8536480.0,partner,Si lees esto que sepas que te aprecio,es,VALORANT,286994651,https://static-cdn.jtvnw.net/jtv_user_pictures...,2015-02-20 16:47:56,"[35980866, 145908612, 205218019, 91136321, 524..."
4,121510236,juansguarnizo,6013625.0,partner,"Juan Sebastián Guarnizo Algarra, más conocido ...",es,Just Chatting,127964660,https://static-cdn.jtvnw.net/jtv_user_pictures...,2016-04-11 03:34:11,"[43419527, 517536651, 476005292, 109492660, 55..."
...,...,...,...,...,...,...,...,...,...,...,...
5026,112100325,MikeCarmineRD,149.0,affiliate,Mi pasion es jugar videojuegos y mi meta es lo...,es,Path of Exile,400979,https://static-cdn.jtvnw.net/jtv_user_pictures...,2016-01-08 19:18:47,"[139120152, 119114270, 31220977, 41472032, 553..."
5027,620773811,PSBoombang,146.0,affiliate,BoomBang.NL ~ Chat & Play,es,BoomBang,802805,https://static-cdn.jtvnw.net/jtv_user_pictures...,2020-12-15 08:53:17,[]
5028,61127118,DavimenXPro,115.0,affiliate,Canal de Youtube: https://www.youtube.com/chan...,es,Pokémon Brilliant Diamond/Shining Pearl,2328,https://static-cdn.jtvnw.net/jtv_user_pictures...,2014-04-18 15:03:20,"[486931255, 219278031, 15463719, 526210023, 81..."
5029,487118375,Perdhroo,102.0,affiliate,"Perdhro sin tele ni cerveza, pierde la cabeza.",es,VALORANT,216969,https://static-cdn.jtvnw.net/jtv_user_pictures...,2020-01-21 10:15:51,"[43912951, 153811929, 79639881, 168100356, 139..."


In [6]:
# converting the df into a graph
G = nx.DiGraph()
for i,user in df.iterrows():
  G.add_node(user["id"], **user.drop(["id","user_follows"]))
  if isinstance(user["user_follows"], np.ndarray):
    G.add_edges_from([(user["id"], id_followed) for id_followed in user["user_follows"]]) 

In [7]:
# checking the correctness of the graph
print("Number of nodes:", G.number_of_nodes())
print("Number of edges:", G.number_of_edges(), "\n")
for i, user in enumerate(G.degree()):
  if i >10:
    break
  print(G.nodes[user[0]]["name"], "is following to", user[1], "users")

Number of nodes: 5077
Number of edges: 474154 

auronplay is following to 2575 users
josecristo_ is following to 219 users
xXxTheFocuSxXx is following to 245 users
Ampeterby7 is following to 539 users
javiDMr10 is following to 143 users
gtv_genesis is following to 284 users
Tanizen is following to 531 users
Carola is following to 682 users
CooLifeGame is following to 940 users
Luzu is following to 669 users
ZormanWorld is following to 292 users


In [8]:
pagerank = nx.pagerank(G)
pagerank = dict(sorted(pagerank.items(), key=lambda x: x[1],reverse=True))

In [9]:
ranking = []
impostors = []
for i in pagerank.keys():
    try:
        ranking.append(G.nodes[i]["name"])
    except:
        impostors.append(i)

In [10]:
len(ranking)
len(impostors)

46

In [11]:
impostors

['675687844',
 '636363437',
 '636353532',
 '424477123',
 '221419583',
 '230829367',
 '624937390',
 '151888706',
 '51135833',
 '162305512',
 '536932955',
 '35687032',
 '60591392',
 '504605515',
 '626878919',
 '694876758',
 '453832409',
 '36818579',
 '507088659',
 '534448996',
 '55602839',
 '158897912',
 '604743610',
 '49735943',
 '62225231',
 '177248291',
 '110836971',
 '602808848',
 '89854125',
 '532320778',
 '145779018',
 '267933571',
 '198579707',
 '613703291',
 '641662198',
 '702822701',
 '691199766',
 '464136263',
 '277921640',
 '210596503',
 '659069498',
 '583547815',
 '498848886',
 '534219817',
 '147881799',
 '491508976']