### Scrape data from [transfermarket][1] into table of columns using beautifulsoup python library
[1]: (https://www.transfermarkt.co.uk/transfers/saisontransfers/statistik?land_id=0&ausrichtung=&spielerposition_id=&altersklasse=&leihe=&plus=1) 

### The table will have columns like illustrated below with the following data
Index  |Player  | Role | Age | Market Value | Nationality | Moving from Team  | Moving from League | Moving to Team | Moving to League  | Transfer fee
  ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | ------------- | -------------
  Index No. | Player Name | Player Role | Player Age  | Player MV  | Player Nat.  | Old Club  | Old League  | New Club  | New League  | Fee

In [1]:
import bs4, requests, selenium, time, pandas as pd

In [2]:
base_url = 'https://www.transfermarkt.co.uk'
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:39.0)'}

In [3]:
# create empty list to hold the scraped data
transfers = []

for page_num in range(1, 314):
    page_url = '/transfers/saisontransfers/statistik?ajax=yw1&altersklasse=&ausrichtung=&land_id=0&leihe=&page={}&plus=2&spielerposition_id='.format(page_num)
    url = base_url + page_url
    r = requests.get(url, headers=headers)
    soup = bs4.BeautifulSoup(r.content, "lxml")

    #
    for row in soup.find_all('table', {'class':'items'}):
        for tb in row.find_all('tbody'):
            for tr in tb.find_all('tr', {'class':['odd','even']}):
                transfers.append([td for td in tr.children])


In [37]:
# create empty lists to hold the scraped data
indexes = []
player_names = []
player_roles = []
ages = []
market_values = []
nationalities = []
from_clubs = []
from_leagues = []
to_clubs = []
to_leagues = []
transfer_values = []

try:
    for i in range(len(transfers)):
        indexes.append(transfers[i][1].text) # index
        for row in transfers[i][2].find_all('img'):
            player_names.append(row['title']) #player name
        row = None #player role
        for row in transfers[i][2].find_all('td'):
            pass
        if row:
            player_roles.append(row.text)
        ages.append(transfers[i][3].text) # age
        market_values.append(transfers[i][4].text) # market value
        nationalities.append(transfers[i][5].find('img')['title']) # nationality
        row = None #footbal club
        for row in transfers[i][6].find_all('a', {'class':'vereinprofil_tooltip'}):
            pass
        if row:
            from_clubs.append(row.text)
        else:
            from_clubs.append('Unknown')
        row = None # league
        for row in transfers[i][6].find_all('a'):
            pass
        if row:
            from_leagues.append(row.text)
        row = None # to footbal club
        for row in transfers[i][7].find_all('a', {'class':'vereinprofil_tooltip'}):
            pass
        if row:
            to_clubs.append(row.text)
        else:
            to_clubs.append('Unknown')
        row = None # to league
        for row in transfers[i][7].find_all('a'):
            pass
        if row:
            to_leagues.append(row.text)
        transfer_values.append(transfers[i][8].text) # transfer value
except IndexError as e:
    pass
print('Finished')

Finished


In [38]:
top_transfers = dict()

In [39]:
top_transfers['index'] = indexes
top_transfers['Player'] = player_names
top_transfers['Position'] = player_roles
top_transfers['Age'] = ages
top_transfers['Nationality'] = nationalities
top_transfers['Market Value'] = market_values
top_transfers['From club'] = from_clubs
top_transfers['From_league'] = from_leagues
top_transfers['To club'] = to_clubs
top_transfers['To league'] = to_leagues
top_transfers['Transfer fee'] = transfer_values

In [41]:
dataset = pd.DataFrame(top_transfers)

In [44]:
dataset.head(30)

Unnamed: 0,Age,From club,From_league,Market Value,Nationality,Player,Position,To club,To league,Transfer fee,index
0,25,FC Barcelona,LaLiga,£135.00m,Brazil,Neymar,Left Wing,Paris SG,Ligue 1,£199.80m,1
1,20,Bor. Dortmund,1.Bundesliga,£72.00m,France,Ousmane Dembélé,Left Wing,FC Barcelona,LaLiga,£94.50m,2
2,19,Monaco,Ligue 1,£81.00m,France,Kylian Mbappé,Centre-Forward,Paris SG,Ligue 1,Loan,3
3,24,Everton,Premier League,£76.50m,Belgium,Romelu Lukaku,Centre-Forward,Man Utd,Premier League,£76.23m,4
4,25,AS Roma,Serie A,£72.00m,Egypt,Mohamed Salah,Right Wing,Liverpool,Premier League,£37.80m,5
5,25,Real Madrid,LaLiga,£58.50m,Spain,Álvaro Morata,Centre-Forward,Chelsea,Premier League,£55.80m,6
6,23,Monaco,Ligue 1,£36.00m,France,Benjamin Mendy,Left-Back,Man City,Premier League,£51.75m,7
7,26,Olympique Lyon,Ligue 1,£49.50m,France,Alexandre Lacazette,Centre-Forward,Arsenal,Premier League,£47.70m,8
8,27,Spurs,Premier League,£36.00m,England,Kyle Walker,Right-Back,Man City,Premier League,£45.90m,9
9,23,Monaco,Ligue 1,£36.00m,Portugal,Bernardo Silva,Right Wing,Man City,Premier League,£45.00m,10


In [45]:
dataset.to_csv('./data/top_transfers_17-18.csv')

In [48]:
pd.read_csv('./data/top_transfers_17-18.csv', index_col=0)

Unnamed: 0,Age,From club,From_league,Market Value,Nationality,Player,Position,To club,To league,Transfer fee,index
0,25,FC Barcelona,LaLiga,£135.00m,Brazil,Neymar,Left Wing,Paris SG,Ligue 1,£199.80m,1
1,20,Bor. Dortmund,1.Bundesliga,£72.00m,France,Ousmane Dembélé,Left Wing,FC Barcelona,LaLiga,£94.50m,2
2,19,Monaco,Ligue 1,£81.00m,France,Kylian Mbappé,Centre-Forward,Paris SG,Ligue 1,Loan,3
3,24,Everton,Premier League,£76.50m,Belgium,Romelu Lukaku,Centre-Forward,Man Utd,Premier League,£76.23m,4
4,25,AS Roma,Serie A,£72.00m,Egypt,Mohamed Salah,Right Wing,Liverpool,Premier League,£37.80m,5
5,25,Real Madrid,LaLiga,£58.50m,Spain,Álvaro Morata,Centre-Forward,Chelsea,Premier League,£55.80m,6
6,23,Monaco,Ligue 1,£36.00m,France,Benjamin Mendy,Left-Back,Man City,Premier League,£51.75m,7
7,26,Olympique Lyon,Ligue 1,£49.50m,France,Alexandre Lacazette,Centre-Forward,Arsenal,Premier League,£47.70m,8
8,27,Spurs,Premier League,£36.00m,England,Kyle Walker,Right-Back,Man City,Premier League,£45.90m,9
9,23,Monaco,Ligue 1,£36.00m,Portugal,Bernardo Silva,Right Wing,Man City,Premier League,£45.00m,10
