<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc" style="margin-top: 1em;"><ul class="toc-item"><li><span><a href="#Managers-clubs" data-toc-modified-id="Managers-clubs-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Managers clubs</a></span></li><li><span><a href="#Build-Network" data-toc-modified-id="Build-Network-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Build Network</a></span></li><li><span><a href="#Create-Network" data-toc-modified-id="Create-Network-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Create Network</a></span></li></ul></div>

In [87]:
# Beautiful Soup for web scrapping data
from bs4 import BeautifulSoup
import requests

# Pandas to reimport csv data
import pandas as pd

#NetworkX
import networkx as nx

In [2]:
def log_progress(sequence, every=None, size=None, name='Items', delete=False):
    from ipywidgets import IntProgress, HTML, VBox
    from IPython.display import display

    is_iterator = False
    if size is None:
        try:
            size = len(sequence)
        except TypeError:
            is_iterator = True
    if size is not None:
        if every is None:
            if size <= 200:
                every = 1
            else:
                every = int(size / 200)     # every 0.5%
    else:
        assert every is not None, 'sequence is iterator, set every'

    if is_iterator:
        progress = IntProgress(min=0, max=1, value=1)
        progress.bar_style = 'info'
    else:
        progress = IntProgress(min=0, max=size, value=0)
    label = HTML()
    box = VBox(children=[label, progress])
    display(box)

    index = 0
    try:
        for index, record in enumerate(sequence, 1):
            if index == 1 or index % every == 0:
                if is_iterator:
                    label.value = '{name}: {index} / ?'.format(
                        name=name,
                        index=index
                    )
                else:
                    progress.value = index
                    label.value = u'{name}: {index} / {size}'.format(
                        name=name,
                        index=index,
                        size=size
                    )
            yield record
    except:
        progress.bar_style = 'danger'
        raise
    else:
        progress.bar_style = 'success'
        progress.value = index
        label.value = "{name}: {index}".format(
            name=name,
            index=str(index or '?')
        )
        if delete:
            box.close()

# Managers clubs

In [21]:
df = pd.read_csv("data/data.csv", index_col=0)
df.sample(5)

Unnamed: 0,Player Name,Player Link,Player position,From club,To club,From competition,To competition,From manager,From manager link,To manager,To manager link,Market value,Fee,Age,From manager agent,To manager agent,Player Agent
30282,nicolae-calancea,/nicolae-calancea/profil/spieler/44264,Keeper,FC Voluntari,CS U Craiova,Liga 1 - Abstiegsrunde,Liga 1 - Meisterrunde,Florin Marin,/florin-marin/profil/trainer/4789,Gheorghe Multescu,/gheorghe-multescu/profil/trainer/5429,250 Th. €,Free transfer,29 years 10 months 02 days,,,
44357,dominik-hofbauer,/dominik-hofbauer/profil/spieler/49718,Central Midfield,SCR Altach,Arka Gdynia,Bundesliga,Ekstraklasa,Damir Canadi,/damir-canadi/profil/trainer/4918,Grzegorz Nicinski,/grzegorz-nicinski/profil/trainer/24890,600 Th. €,Free transfer,25 years 11 months 26 days,Alliance ...,,Polish Sports Group
37565,francesco-di-paola,/francesco-di-paola/profil/spieler/409657,Centre-Forward,Foligno,Viareggio,Serie D - Girone G,Serie D - Girone E,Tommaso Guazzolini,/tommaso-guazzolini/profil/trainer/46761,Marco Masi,/marco-masi/profil/trainer/10065,100 Th. €,Free transfer,31 years 09 months 20 days,,,
362,ingvar-thor-kale,/ingvar-thor-kale/profil/spieler/110175,Keeper,Víkingur,Valur,Pepsideild,Pepsideild,Ólafur Thórdarson,/olafur-thordarson/profil/trainer/13998,Ólafur Jóhannesson,/olafur-johannesson/profil/trainer/1490,100 Th. €,Free transfer,31 years 24 days,,,
7971,adam-dugdale,/adam-dugdale/profil/spieler/46460,Centre-Back,Tranmere Rovers,Morecambe FC,Conference National,League Two,Gary Brabin,/gary-brabin/profil/trainer/8277,Jim Bentley,/jim-bentley/profil/trainer/15236,50 Th. €,Free transfer,27 years 09 months 19 days,,,FullNinety Sports Management


In [77]:
manager_from = list(set(df['From manager link']))
manager_to = list(set(df['To manager link']))
managers = list(set(manager_from + manager_to))
managers[:4]

['/thierry-froger/profil/trainer/1536',
 '/pedro-munitis/profil/trainer/39308',
 '/roy-teubel/profil/trainer/42115',
 '/edis-mulalic/profil/trainer/42658']

In [78]:
data = []

headers = {'User-Agent': 'Mozilla/5.00'}
base_url = "https://www.transfermarkt.com"

for manager_url in log_progress(managers, every=5):

    url = base_url + manager_url
    r  = requests.get(url, headers=headers)
    manager_data_page = BeautifulSoup(r.text, 'html.parser')

    manager_table = manager_data_page.find("div", {"class":"responsive-table"})
    for mt in manager_table.find('tbody').find_all('tr'):
        if 'class="extrarow bg_blau_20 rechts"' in str(mt):        
            break
        if "Manager" in str(mt):
            club = mt.findAll('a')[1].text
            data.append([manager_url, club])

In [85]:
df_2 = pd.DataFrame(data, columns=["Manager", "Club"])
df_2.Manager = df_2.Manager.apply(lambda x: x.split('/')[1])

In [86]:
df_2

Unnamed: 0,Manager,Club
0,thierry-froger,TP Mazembe
1,thierry-froger,US Créteil
2,thierry-froger,OC Vannes
3,thierry-froger,Nîmes Olympique
4,thierry-froger,Togo
5,thierry-froger,Reims
6,thierry-froger,FC Gueugnon
7,thierry-froger,LB Châteauroux
8,thierry-froger,LOSC Lille
9,thierry-froger,Le Mans UC 72


# Build Network

In [103]:
G = nx.Graph()

In [104]:
for a in df_2.groupby('Club'):
    managers = a[1].Manager
    if len(managers)>1:
        for m1 in managers:
            for m2 in managers:
                if m1!=m2:
                    G.add_edge(m1,m2, club=a[0])

In [106]:
print(nx.info(G))

Name: 
Type: Graph
Number of nodes: 4976
Number of edges: 47204
Average degree:  18.9727


In [107]:
nx.write_gml(G, "networks/managers.gml")