In [1]:
def get_data(endpoint, params):
    
    URL = "https://v3.football.api-sports.io/"
    headers = {
	'x-rapidapi-host': "v3.football.api-sports.io",
    'x-rapidapi-key': "fb2140228973d644db847895c454c22b"
    }
    
    response = requests.get(
        URL+endpoint,
        headers = headers,
        params = params
    )
    if response.status_code == 200:
            
        remaining = response.headers.get("x-ratelimit-requests-remaining")
        data = response.json()
        print(f"requests before reaching limit {remaining}")

    else:
        print(f"Error {response.status_code}, {response.text}")

    return data, remaining                                                                     

In [2]:
def encode_data(data_dict, parent_key = '', sep= '_'):
    encoded = []
    for key, val in data_dict.items():
        new_key = f'{parent_key}{sep}{key}' if parent_key else key
        if isinstance(val, dict):
            encoded.extend(encode_data(val, new_key, sep=sep).items())
        elif isinstance(val, list):
            if val:
                if all(isinstance(i, dict) for i in val):
                    for k, v in enumerate(val):
                        v_key = f'{new_key}{sep}{k}'
                        encoded.extend(encode_data(v, v_key, sep=sep).items())
                else:
                    encoded.append((new_key, val))
            else:
                encoded.append((new_key, []))
        else:
            encoded.append((new_key, val))
    return dict(encoded)

In [3]:
def data_to_sql(table_name, df, db_params, conflict_columns):
    conn = None
    cur = None
    try:
        # Establish the connection
        conn = psycopg2.connect(**db_params)
        cur = conn.cursor()
        
        #insert data into tables
        if len(conflict_columns) == 0:
            insert_query = """
                INSERT INTO {} ({})
                VALUES ({})
            """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)))
        else:
            insert_query = """
                INSERT INTO {} ({})
                VALUES ({})
                ON CONFLICT ({}) DO NOTHING
            """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)), ','.join(conflict_columns))
        if len(df) > 0:
            last_row = df.iloc[-1]
        cur.executemany(insert_query, df.values.tolist())
        print(f'table {table_name} updated')
        
        # Commit the changes
        conn.commit()
        
    
    except Exception as e:
        print(f"Error: {e}")
        if last_row is not None:
            print(f"Last row loaded before the error occurred: {last_row}")
    finally:
        if conn is not None:
            # Close the cursor and connection
            cur.close()
        if cur is not None:
            conn.close()

In [None]:
import time
import boto3
import pandas as pd
import io
import psycopg2
import numpy as np
import requests
import json
import pickle

# get current and european seasons files from bucket
s3 = boto3.client('s3')

bucket_name = 'sport-storage'
object_key1 = 'current.csv'
object_key2 = 'european_seasons.csv'
object_key3 = 'teams.json'

response1 = s3.get_object(Bucket = bucket_name, Key = object_key1)
response2 = s3.get_object(Bucket = bucket_name, Key = object_key2)
response3 = s3.get_object(Bucket = bucket_name, Key = object_key3)

current = pd.read_csv(io.BytesIO(response1['Body'].read()))
european_seasons = pd.read_csv(io.BytesIO(response2['Body'].read()))

# insert parameters to be found
league = current['league_id'][0]
year = current['year'][0]
to_find = current['type'][0]

# get current index
index = np.where((european_seasons['league_id']==league)&(european_seasons['year']==year))[0][0]

remaining = 100
total_team_stats_data = []
total_teams_data = []
done = False

# if script ended on collecting team stats get rest of teams
if to_find == 'teams/statistics':
    teams_list = pickle.loads(response3['Body'].read())
else:
    teams_list = []

# if limit of requests wasnt reached and there is still something to collect enter loop
while (done==False)&(remaining > 0):

    # get data from API
    if to_find == 'teams': 
        params = {'league': league,
                  'season': year
                 }
    else:
        team = teams_list[0]
        params = {'league': league,
                  'season': year,
                  'team': team
                 }
        
    endpoint = to_find
    data, remaining_req = get_data(endpoint, params)
    
    # preprocess data
    if endpoint == 'teams':
        total_teams_data.extend(encode_data(team) for team in data['response'])
    else:
        total_team_stats_data.append(encode_data(data['response']))

    # if last endpoint is teams now find stats for every team in that season
    if endpoint == 'teams':
        to_find = 'teams/statistics'
        teams_list = [row['team']['id'] for row in data['response']]

    # if team stats are collecting drop team that data was already collected for
    else:
        (endpoint == 'teams/statistics') & (len(teams_list) > 0)
        teams_list.pop(0)

    # if team stats were collected for every team in current season move to next season
        if (endpoint == 'teams/statistics') & (len(teams_list) == 0) & (index < len(european_seasons)-1):
            index += 1
            league = european_seasons.loc[index]['league_id']
            year = european_seasons.loc[index]['year']
            to_find = 'teams'

    # if data for all seasons were collected quit loop
        elif (endpoint == 'teams/statistics') & (len(teams_list) == 0) & (index == len(european_seasons)-1):
            done = True
        
    remaining = int(remaining_req)
    print(remaining)
    # sleep cause there can be done only 10 requests per minute
    time.sleep(7)
    
    # saving to sql database
    # db parameters
db_params = {
    'host': 'localhost',
    'database': 'preds',
    'user': 'postgres',
    'password': 'pass',
    'port': '5432'
}


if len(total_team_stats_data) > 0:
    team_stats_df = pd.DataFrame(total_team_stats_data)
    team_stats_df = team_stats_df.drop(columns = {
        'league_name', 
        'league_country', 
        'league_logo', 
        'league_flag', 
        'team_name', 
        'team_logo',
        'lineups'})
    team_stats_df = team_stats_df.fillna(0)
    conflict_col = []
    team_stats_df.columns = team_stats_df.columns.str.replace('-','_')
    data_to_sql('team_stats', team_stats_df, db_params, conflict_col)
                
if len(total_teams_data) > 0:
    teams_df = pd.DataFrame(total_teams_data)
    teams_df = teams_df[['team_id', 'team_name', 'team_country', 'team_logo', 'team_national', 'venue_capacity', 'venue_surface']].rename(columns = {'team_national': 'national'})
    conflict_col = ['team_id']
    data_to_sql('teams', teams_df, db_params, conflict_col)

if not done:
    #
    json_data = json.dumps(teams_list)
    s3.put_object(Bucket = bucket_name, Key = object_key3, Body=json_data)
   
    data = {'league_id': [league], 'year': [year], 'type': [to_find]}
    current = pd.DataFrame(data)
    
    csv_buffer = io.StringIO()
    current.to_csv(csv_buffer, index=False)
    s3.put_object(Bucket = bucket_name, Key = object_key1, Body=csv_buffer.getvalue().encode())
    
    print(f'{len(european_seasons)-index-1} seasons left')
else:
    print('Teams and their statistics were collected for every season played in Europe')

In [5]:
import requests

endpoint = 'fixtures'
params= {'league': 39,
         'season': 2023}

df, remain = get_data(endpoint, params)

requests before reaching limit 99


In [7]:
df['response']

[{'fixture': {'id': 1035037,
   'referee': 'C. Pawson',
   'timezone': 'UTC',
   'date': '2023-08-11T19:00:00+00:00',
   'timestamp': 1691780400,
   'periods': {'first': 1691780400, 'second': 1691784000},
   'venue': {'id': 512, 'name': 'Turf Moor', 'city': 'Burnley'},
   'status': {'long': 'Match Finished', 'short': 'FT', 'elapsed': 90}},
  'league': {'id': 39,
   'name': 'Premier League',
   'country': 'England',
   'logo': 'https://media.api-sports.io/football/leagues/39.png',
   'flag': 'https://media.api-sports.io/flags/gb.svg',
   'season': 2023,
   'round': 'Regular Season - 1'},
  'teams': {'home': {'id': 44,
    'name': 'Burnley',
    'logo': 'https://media.api-sports.io/football/teams/44.png',
    'winner': False},
   'away': {'id': 50,
    'name': 'Manchester City',
    'logo': 'https://media.api-sports.io/football/teams/50.png',
    'winner': True}},
  'goals': {'home': 0, 'away': 3},
  'score': {'halftime': {'home': 0, 'away': 2},
   'fulltime': {'home': 0, 'away': 3},
   

In [13]:
import pandas as pd
fixs = []
fixs.extend(encode_data(row) for row in df['response'])
fixs = pd.DataFrame(fixs)
fixs.head()

Unnamed: 0,fixture_id,fixture_referee,fixture_timezone,fixture_date,fixture_timestamp,fixture_periods_first,fixture_periods_second,fixture_venue_id,fixture_venue_name,fixture_venue_city,...,goals_home,goals_away,score_halftime_home,score_halftime_away,score_fulltime_home,score_fulltime_away,score_extratime_home,score_extratime_away,score_penalty_home,score_penalty_away
0,1035037,C. Pawson,UTC,2023-08-11T19:00:00+00:00,1691780400,1691780400,1691784000,512.0,Turf Moor,Burnley,...,0,3,0,2,0,3,,,,
1,1035038,M. Oliver,UTC,2023-08-12T11:30:00+00:00,1691839800,1691839800,1691843400,494.0,Emirates Stadium,London,...,2,1,2,0,2,1,,,,
2,1035039,P. Bankes,UTC,2023-08-12T14:00:00+00:00,1691848800,1691848800,1691852400,504.0,Vitality Stadium,"Bournemouth, Dorset",...,1,1,0,0,1,1,,,,
3,1035040,D. Coote,UTC,2023-08-12T14:00:00+00:00,1691848800,1691848800,1691852400,508.0,The American Express Community Stadium,"Falmer, East Sussex",...,4,1,1,0,4,1,,,,
4,1035041,S. Attwell,UTC,2023-08-12T14:00:00+00:00,1691848800,1691848800,1691852400,8560.0,Goodison Park,Liverpool,...,0,1,0,0,0,1,,,,


In [15]:
listwa = fixs.columns
for el in listwa:
    print(el)

fixture_id
fixture_referee
fixture_timezone
fixture_date
fixture_timestamp
fixture_periods_first
fixture_periods_second
fixture_venue_id
fixture_venue_name
fixture_venue_city
fixture_status_long
fixture_status_short
fixture_status_elapsed
league_id
league_name
league_country
league_logo
league_flag
league_season
league_round
teams_home_id
teams_home_name
teams_home_logo
teams_home_winner
teams_away_id
teams_away_name
teams_away_logo
teams_away_winner
goals_home
goals_away
score_halftime_home
score_halftime_away
score_fulltime_home
score_fulltime_away
score_extratime_home
score_extratime_away
score_penalty_home
score_penalty_away


In [54]:
total_team_stats_data = []
total_team_stats_data.append(encode_data(df2['response']))
total_team_stats_data

[{'league_id': 39,
  'league_name': 'Premier League',
  'league_country': 'England',
  'league_logo': 'https://media.api-sports.io/football/leagues/39.png',
  'league_flag': 'https://media.api-sports.io/flags/gb.svg',
  'league_season': 2023,
  'team_id': 40,
  'team_name': 'Liverpool',
  'team_logo': 'https://media.api-sports.io/football/teams/40.png',
  'form': 'DWWWWWLDWWDWDWWWDDWWWWLWWWWDWWDLWLDWDW',
  'fixtures_played_home': 19,
  'fixtures_played_away': 19,
  'fixtures_played_total': 38,
  'fixtures_wins_home': 15,
  'fixtures_wins_away': 9,
  'fixtures_wins_total': 24,
  'fixtures_draws_home': 3,
  'fixtures_draws_away': 7,
  'fixtures_draws_total': 10,
  'fixtures_loses_home': 1,
  'fixtures_loses_away': 3,
  'fixtures_loses_total': 4,
  'goals_for_total_home': 49,
  'goals_for_total_away': 37,
  'goals_for_total_total': 86,
  'goals_for_average_home': '2.6',
  'goals_for_average_away': '1.9',
  'goals_for_average_total': '2.3',
  'goals_for_minute_0-15_total': 2,
  'goals_for_

In [39]:
total_teams_data = []
total_teams_data.extend(encode_data(row) for row in df['response'])
total_teams_data

[{'team_id': 33,
  'team_name': 'Manchester United',
  'team_code': 'MUN',
  'team_country': 'England',
  'team_founded': 1878,
  'team_national': False,
  'team_logo': 'https://media.api-sports.io/football/teams/33.png',
  'venue_id': 556,
  'venue_name': 'Old Trafford',
  'venue_address': 'Sir Matt Busby Way',
  'venue_city': 'Manchester',
  'venue_capacity': 76212,
  'venue_surface': 'grass',
  'venue_image': 'https://media.api-sports.io/football/venues/556.png'},
 {'team_id': 34,
  'team_name': 'Newcastle',
  'team_code': 'NEW',
  'team_country': 'England',
  'team_founded': 1892,
  'team_national': False,
  'team_logo': 'https://media.api-sports.io/football/teams/34.png',
  'venue_id': 562,
  'venue_name': "St. James' Park",
  'venue_address': 'St. James&apos; Street',
  'venue_city': 'Newcastle upon Tyne',
  'venue_capacity': 52758,
  'venue_surface': 'grass',
  'venue_image': 'https://media.api-sports.io/football/venues/562.png'},
 {'team_id': 35,
  'team_name': 'Bournemouth',
 

In [55]:
import pandas as pd
team_stats_df = pd.DataFrame(total_team_stats_data)

In [58]:
team_stats_df = team_stats_df.drop(columns = {
    'league_name', 
    'league_country', 
    'league_logo', 
    'league_flag', 
    'team_name', 
    'team_logo'})
team_stats_df.head()

Unnamed: 0,league_id,league_season,team_id,form,fixtures_played_home,fixtures_played_away,fixtures_played_total,fixtures_wins_home,fixtures_wins_away,fixtures_wins_total,...,cards_red_46-60_total,cards_red_46-60_percentage,cards_red_61-75_total,cards_red_61-75_percentage,cards_red_76-90_total,cards_red_76-90_percentage,cards_red_91-105_total,cards_red_91-105_percentage,cards_red_106-120_total,cards_red_106-120_percentage
0,39,2023,40,DWWWWWLDWWDWDWWWDDWWWWLWWWWDWWDLWLDWDW,19,19,38,15,9,24,...,1,20.00%,1,20.00%,1,20.00%,,,,


In [59]:
a = team_stats_df.columns

In [62]:
for el in a:
    print(el)

league_id
league_season
team_id
form
fixtures_played_home
fixtures_played_away
fixtures_played_total
fixtures_wins_home
fixtures_wins_away
fixtures_wins_total
fixtures_draws_home
fixtures_draws_away
fixtures_draws_total
fixtures_loses_home
fixtures_loses_away
fixtures_loses_total
goals_for_total_home
goals_for_total_away
goals_for_total_total
goals_for_average_home
goals_for_average_away
goals_for_average_total
goals_for_minute_0-15_total
goals_for_minute_0-15_percentage
goals_for_minute_16-30_total
goals_for_minute_16-30_percentage
goals_for_minute_31-45_total
goals_for_minute_31-45_percentage
goals_for_minute_46-60_total
goals_for_minute_46-60_percentage
goals_for_minute_61-75_total
goals_for_minute_61-75_percentage
goals_for_minute_76-90_total
goals_for_minute_76-90_percentage
goals_for_minute_91-105_total
goals_for_minute_91-105_percentage
goals_for_minute_106-120_total
goals_for_minute_106-120_percentage
goals_against_total_home
goals_against_total_away
goals_against_total_total
g

In [46]:
teams_df = teams_df[['team_id', 'team_name', 'team_country', 'team_logo', 'team_national', 'venue_capacity', 'venue_surface']].rename(columns = {'team_national': 'national'})
teams_df

Unnamed: 0,team_id,team_name,team_country,team_logo,national,venue_capacity,venue_surface
0,33,Manchester United,England,https://media.api-sports.io/football/teams/33.png,False,76212,grass
1,34,Newcastle,England,https://media.api-sports.io/football/teams/34.png,False,52758,grass
2,35,Bournemouth,England,https://media.api-sports.io/football/teams/35.png,False,12000,grass
3,36,Fulham,England,https://media.api-sports.io/football/teams/36.png,False,25700,grass
4,39,Wolves,England,https://media.api-sports.io/football/teams/39.png,False,34624,grass
5,40,Liverpool,England,https://media.api-sports.io/football/teams/40.png,False,55212,grass
6,42,Arsenal,England,https://media.api-sports.io/football/teams/42.png,False,60383,grass
7,44,Burnley,England,https://media.api-sports.io/football/teams/44.png,False,22546,grass
8,45,Everton,England,https://media.api-sports.io/football/teams/45.png,False,40569,grass
9,47,Tottenham,England,https://media.api-sports.io/football/teams/47.png,False,62850,grass


In [51]:
import psycopg2

db_params = {
    'host': 'localhost',
    'database': 'preds',
    'user': 'postgres',
    'password': 'pass',
    'port': '5432'
}

data_to_sql('teams', teams_df, db_params)

table teams updated


In [45]:
import pandas as pd
listwa = []
final = []
for i in range(0,10):
    for j in range(0,10):
        d = (j, j+i)
        listwa.append(d)
        tmp = dict(listwa)
    final.append(tmp)
final


[{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9},
 {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10},
 {0: 2, 1: 3, 2: 4, 3: 5, 4: 6, 5: 7, 6: 8, 7: 9, 8: 10, 9: 11},
 {0: 3, 1: 4, 2: 5, 3: 6, 4: 7, 5: 8, 6: 9, 7: 10, 8: 11, 9: 12},
 {0: 4, 1: 5, 2: 6, 3: 7, 4: 8, 5: 9, 6: 10, 7: 11, 8: 12, 9: 13},
 {0: 5, 1: 6, 2: 7, 3: 8, 4: 9, 5: 10, 6: 11, 7: 12, 8: 13, 9: 14},
 {0: 6, 1: 7, 2: 8, 3: 9, 4: 10, 5: 11, 6: 12, 7: 13, 8: 14, 9: 15},
 {0: 7, 1: 8, 2: 9, 3: 10, 4: 11, 5: 12, 6: 13, 7: 14, 8: 15, 9: 16},
 {0: 8, 1: 9, 2: 10, 3: 11, 4: 12, 5: 13, 6: 14, 7: 15, 8: 16, 9: 17},
 {0: 9, 1: 10, 2: 11, 3: 12, 4: 13, 5: 14, 6: 15, 7: 16, 8: 17, 9: 18}]

In [47]:
res = pd.DataFrame(final)
res

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,1,2,3,4,5,6,7,8,9
1,1,2,3,4,5,6,7,8,9,10
2,2,3,4,5,6,7,8,9,10,11
3,3,4,5,6,7,8,9,10,11,12
4,4,5,6,7,8,9,10,11,12,13
5,5,6,7,8,9,10,11,12,13,14
6,6,7,8,9,10,11,12,13,14,15
7,7,8,9,10,11,12,13,14,15,16
8,8,9,10,11,12,13,14,15,16,17
9,9,10,11,12,13,14,15,16,17,18
