In [37]:
import pandas as pd 
import time
import random
import os
import utils
from datetime import datetime
import uuid

### Preparing dataframe

In [38]:
url = ['https://www.espn.com.co/futbol/posiciones/_/liga/esp.1',
      'https://www.espn.com.co/futbol/posiciones/_/liga/eng.1',
      'https://www.espn.com.co/futbol/posiciones/_/liga/ita.1',
      'https://www.espn.com.co/futbol/posiciones/_/liga/ger.1',
      'https://www.espn.com.co/futbol/posiciones/_/liga/fra.1',
      'https://www.espn.com.co/futbol/posiciones/_/liga/por.1',
      'https://www.espn.com.co/futbol/posiciones/_/liga/ned.1']
leagues = ['ESPAÑA','INGLATERRA','ITALIA','GERMANY','FRANCIA','PORTUGAL','HOLANDA']

df_leagues = {
    'LEAGUE':leagues,
    'URL': url
}

df_leagues = pd.DataFrame(df_leagues)
df_leagues


Unnamed: 0,LEAGUE,URL
0,ESPAÑA,https://www.espn.com.co/futbol/posiciones/_/li...
1,INGLATERRA,https://www.espn.com.co/futbol/posiciones/_/li...
2,ITALIA,https://www.espn.com.co/futbol/posiciones/_/li...
3,GERMANY,https://www.espn.com.co/futbol/posiciones/_/li...
4,FRANCIA,https://www.espn.com.co/futbol/posiciones/_/li...
5,PORTUGAL,https://www.espn.com.co/futbol/posiciones/_/li...
6,HOLANDA,https://www.espn.com.co/futbol/posiciones/_/li...


In [39]:
# getting the first one and read scores from the web page html table

df = pd.read_html(df_leagues['URL'][1])
df

[                       2023/2024
 0            1MCIManchester City
 1          2TOTTottenham Hotspur
 2                  3LIVLiverpool
 3            4WHUWest Ham United
 4                    5ARSArsenal
 5     6BHABrighton & Hove Albion
 6             7CRYCrystal Palace
 7                  8BRNBrentford
 8          9FORNottingham Forest
 9               10AVLAston Villa
 10        11MUNManchester United
 11                  12CHEChelsea
 12                   13FULFulham
 13         14NEWNewcastle United
 14  15WOLWolverhampton Wanderers
 15          16BOUAFC Bournemouth
 16   17Sheff UtdSheffield United
 17                  18EVEEverton
 18               19LTNLuton Town
 19                  20BRNBurnley,
     J  G  E  P  GF  GC  DIF  PTS
 0   4  4  0  0  11   2    9   12
 1   4  3  1  0  11   4    7   10
 2   4  3  1  0   9   3    6   10
 3   4  3  1  0   9   4    5   10
 4   4  3  1  0   8   4    4   10
 5   4  3  0  1  12   6    6    9
 6   4  2  1  1   5   4    1    7
 7   4  1  3 

In [40]:
# Tables length
len(df)

2

In [41]:
# Get teams table
df[0]

Unnamed: 0,2023/2024
0,1MCIManchester City
1,2TOTTottenham Hotspur
2,3LIVLiverpool
3,4WHUWest Ham United
4,5ARSArsenal
5,6BHABrighton & Hove Albion
6,7CRYCrystal Palace
7,8BRNBrentford
8,9FORNottingham Forest
9,10AVLAston Villa


In [42]:
# Get scores table
df[1]

Unnamed: 0,J,G,E,P,GF,GC,DIF,PTS
0,4,4,0,0,11,2,9,12
1,4,3,1,0,11,4,7,10
2,4,3,1,0,9,3,6,10
3,4,3,1,0,9,4,5,10
4,4,3,1,0,8,4,4,10
5,4,3,0,1,12,6,6,9
6,4,2,1,1,5,4,1,7
7,4,1,3,0,8,5,3,6
8,4,2,0,2,6,6,0,6
9,4,2,0,2,8,9,-1,6


In [43]:
# Join tables, index to tindex

df = pd.concat([df[0], df[1]], ignore_index=True, axis=1)
df

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,1MCIManchester City,4,4,0,0,11,2,9,12
1,2TOTTottenham Hotspur,4,3,1,0,11,4,7,10
2,3LIVLiverpool,4,3,1,0,9,3,6,10
3,4WHUWest Ham United,4,3,1,0,9,4,5,10
4,5ARSArsenal,4,3,1,0,8,4,4,10
5,6BHABrighton & Hove Albion,4,3,0,1,12,6,6,9
6,7CRYCrystal Palace,4,2,1,1,5,4,1,7
7,8BRNBrentford,4,1,3,0,8,5,3,6
8,9FORNottingham Forest,4,2,0,2,6,6,0,6
9,10AVLAston Villa,4,2,0,2,8,9,-1,6


### Cleaning data

In [44]:
# rename columns
df = df.rename(columns={
    0:'team',
    1:'played',
    2:'won',
    3:'tied',
    4:'lost',
    5:'goals_favor',
    6:'goals_against',
    7:'diff', 
    8:'scores'
})
# clean team names
df['team'] = df['team'].apply(lambda x: x[5:] if x[:2].isnumeric()==True else x[4:])

# add league name column
df['league'] = df_leagues['LEAGUE'][1]

df

Unnamed: 0,team,played,won,tied,lost,goals_favor,goals_against,diff,scores,league
0,Manchester City,4,4,0,0,11,2,9,12,INGLATERRA
1,Tottenham Hotspur,4,3,1,0,11,4,7,10,INGLATERRA
2,Liverpool,4,3,1,0,9,3,6,10,INGLATERRA
3,West Ham United,4,3,1,0,9,4,5,10,INGLATERRA
4,Arsenal,4,3,1,0,8,4,4,10,INGLATERRA
5,Brighton & Hove Albion,4,3,0,1,12,6,6,9,INGLATERRA
6,Crystal Palace,4,2,1,1,5,4,1,7,INGLATERRA
7,Brentford,4,1,3,0,8,5,3,6,INGLATERRA
8,Nottingham Forest,4,2,0,2,6,6,0,6,INGLATERRA
9,Aston Villa,4,2,0,2,8,9,-1,6,INGLATERRA
