In [91]:
import requests
import pandas as pd
from configparser import ConfigParser

parser = ConfigParser()
parser.read('pipeline.conf')
api_key = parser.items('football-data')[0][1]
headers = {'X-AUTH-TOKEN':api_key}


In [92]:
# Chequeo si la api funciona
requests.get('https://api.football-data.org/v4/matches', headers=headers).status_code

200

In [93]:
url_teams = 'https://api.football-data.org/v4/competitions/PL/teams'
data_PL = requests.get(url=url_teams, headers=headers).json()
data_PL.keys()




dict_keys(['count', 'filters', 'competition', 'season', 'teams'])

In [94]:
teams_data = data_PL['teams']
teams_data[0].keys()

dict_keys(['area', 'id', 'name', 'shortName', 'tla', 'crest', 'address', 'website', 'founded', 'clubColors', 'venue', 'runningCompetitions', 'coach', 'squad', 'staff', 'lastUpdated'])

In [95]:
teams = []
for team in teams_data:
    coach = f'{team['coach']['firstName']} {team['coach']['lastName']}'
    teams.append(dict(id=team['id'],team=team['name'],Stadium=team['venue'],dt=coach,lastUpdated=team['lastUpdated']))

In [96]:
columns = ['id','team','Stadium','dt','lastUpdated']
df_2024_2025_PL = pd.DataFrame(data=teams,columns=columns)
# id , team , Stadium , dt , LastUpdated
df_2024_2025_PL

Unnamed: 0,id,team,Stadium,dt,lastUpdated
0,57,Arsenal FC,Emirates Stadium,Mikel Arteta,2022-02-10T19:48:56Z
1,58,Aston Villa FC,Villa Park,Unai Emery,2022-04-03T16:22:14Z
2,61,Chelsea FC,Stamford Bridge,Enzo Maresca,2022-02-10T19:24:40Z
3,62,Everton FC,Goodison Park,Sean Dyche,2022-02-10T19:47:42Z
4,63,Fulham FC,Craven Cottage,Marco Silva,2024-07-29T17:16:11Z
5,64,Liverpool FC,Anfield,Arne Slot,2022-02-10T19:30:22Z
6,65,Manchester City FC,Etihad Stadium,Pep Guardiola,2022-02-10T19:48:37Z
7,66,Manchester United FC,Old Trafford,Erik ten Hag,2022-02-10T19:27:46Z
8,67,Newcastle United FC,St. James' Park,Eddie Howe,2022-09-28T18:51:07Z
9,73,Tottenham Hotspur FC,Tottenham Hotspur Stadium,Ange Postecoglou,2024-05-11T21:39:03Z


In [97]:
squads = []
for team in teams_data:
    squad_data = team['squad']
    squad = []
    for player_data in squad_data:
        player = dict(id=player_data['id'],name=player_data['name'],position=player_data['position'],nationality=player_data['nationality']) 
        squad.append(player)
    
    squads.append(dict(id_team=team['id'],squad=squad))
    
squads[0]['squad'][5:10]

[{'id': 112948,
  'name': 'Fabio Vieira',
  'position': 'Attacking Midfield',
  'nationality': 'Portugal'},
 {'id': 99813,
  'name': 'Bukayo Saka',
  'position': 'Right Winger',
  'nationality': 'England'},
 {'id': 98816,
  'name': 'Jurrien Timber',
  'position': 'Centre-Back',
  'nationality': 'Netherlands'},
 {'id': 80171,
  'name': 'William Saliba',
  'position': 'Centre-Back',
  'nationality': 'France'},
 {'id': 61450,
  'name': 'Martinelli',
  'position': 'Left Winger',
  'nationality': 'Brazil'}]

In [98]:
df_squads = pd.DataFrame(data=squads)
df_squads.head(10)
# Cada registro muestra el id_team y una lista de jugadores


Unnamed: 0,id_team,squad
0,57,"[{'id': 4832, 'name': 'David Raya', 'position'..."
1,58,"[{'id': 3141, 'name': 'Emiliano Martínez', 'po..."
2,61,"[{'id': 3189, 'name': 'Kepa Arrizabalaga', 'po..."
3,62,"[{'id': 3309, 'name': 'Jordan Pickford', 'posi..."
4,63,"[{'id': 3174, 'name': 'Bernd Leno', 'position'..."
5,64,"[{'id': 1795, 'name': 'Alisson', 'position': '..."
6,65,"[{'id': 3222, 'name': 'Ederson', 'position': '..."
7,66,"[{'id': 7544, 'name': 'André Onana', 'position..."
8,67,"[{'id': 3310, 'name': 'Nick Pope', 'position':..."
9,73,"[{'id': 3086, 'name': 'Guglielmo Vicario', 'po..."


In [99]:
# Normalizamos el json para que cada jugador sea un registro con su respectivo id_team
df_squads = pd.json_normalize(data=squads,record_path='squad',meta='id_team')
df_squads.head()

Unnamed: 0,id,name,position,nationality,id_team
0,4832,David Raya,Goalkeeper,Spain,57
1,5530,Aaron Ramsdale,Goalkeeper,England,57
2,153843,Karl Jakob Hein,Goalkeeper,Estonia,57
3,147286,Jakub Kiwior,Left-Back,Poland,57
4,133512,Riccardo Calafiori,Centre-Back,Italy,57


In [100]:
data_matches = requests.get('https://api.football-data.org/v4/competitions/PL/matches', headers=headers).json()

In [101]:
# Estado de todos los partidos matchday, id, status, utcDate
matches_data = []
for match in data_matches['matches']:
    matches_data.append(dict(matchday=match['matchday'],id=match['id'],status=match['status'],date=match['utcDate']))

matches_data = pd.DataFrame(data=matches_data)
matches_data.head()
# Cuando cada partido este en estado finished, se cargaran datos a la base de datos, y tambien tenemos
# la utcdate para hacer la carga incremental
# Cada vez que se ejecute el script, se cargaran los datos de los partidos que esten en estado finished y que no se hayan cargado previamente  
# El dato que determinara cual fue la ultima carga estara en un .json que se actualizara cada vez que se ejecute el script  

Unnamed: 0,matchday,id,status,date
0,1,497410,TIMED,2024-08-16T19:00:00Z
1,1,497411,TIMED,2024-08-17T11:30:00Z
2,1,497412,TIMED,2024-08-17T14:00:00Z
3,1,497413,TIMED,2024-08-17T14:00:00Z
4,1,497414,TIMED,2024-08-17T14:00:00Z
