# 1. Teams in each season 

In [1]:
def get_data(endpoint, params):
    response = requests.get(
        URL+endpoint,
        headers = headers,
        params = params
    )
    if response.status_code == 200:
            
        remaining = response.headers.get("x-ratelimit-requests-remaining")
        data = response.json()
        print(f"requests before reaching limit {remaining}")

    else:
        print(f"Error {response.status_code}, {response.text}")

    return data, remaining                                                                     

In [None]:
def preprocess_teams(data):
    team_info = data['team']
    
    team_dict = {
        'team_id': team_info['id'],
        'team_name': team_info['name'],
        'team_country': team_info['country'],
        'national': team_info['national'],
        'venue_capacity': data['vanue']['capacity'],
        'venure_surface': data['venue']['surface']
    }
    return team_dict

In [None]:
def get_secret(secret_name, region_name):
    # Create a Secrets Manager client
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name
    )
    
    try:
        # Fetch the secret value
        get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    except Exception as e:
        print(f"Error retrieving secret: {e}")
        raise e
    
    # Decrypts secret using the associated KMS key
    secret = get_secret_value_response['SecretString']
    return json.loads(secret)

In [None]:
def data_to_sql(table_name, df, secret):
    # Parameters
    secret_name = 'rds!db-bb99984f-0543-4818-a706-151ff68fd186'  # The name of your secret in AWS Secrets Manager
    region_name = 'eu-north-1'  # e.g., 'us-west-2'
    
    # Get the secret
    secret = get_secret(secret_name, region_name)
    
    db_params = {
        'host': 'database-1.crggkemkunji.eu-north-1.rds.amazonaws.com',
        'database': 'football',
        'user': secret['username'],
        'password': secret['password']
    }
    try:
        # Establish the connection
        conn = psycopg2.connect(**db_params)
        cur = conn.cursor()
        
        #insert data into tables
        modified_columns = [change_column_name(col) for col in df.columns]
        insert_query = """
            INSERT INTO {} ({})
            VALUES ({})
        """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)))
        pgcursor.executemany(insert_query, df.values.tolist())
        print(f'table {table_name} updated')
        
        # Commit the changes
        conn.commit()
        
    
    except Exception as e:
        print(f"Error: {e}")
    
    finally:
        # Close the cursor and connection
        cur.close()
        conn.close()

In [58]:
import time
import boto3
import pandas as pd
import io
import psycopg2

# get current and european seasons files from bucket
s3 = boto3.client('s3')

bucket_name = 'sport-storage'
object_key1 = 'current.csv'
object_key2 = 'european_seasons.csv'

response1 = s3.get_object(Bucket = bucket_name, Key = object_key1)
response2 = s3.get_object(Bucket = bucket_name, Key = object_key2)

current = pd.read_csv(io.BytesIO(response1['Body'].read()))
european_seasons = pd.read_csv(io.BytesIO(response2['Body'].read()))

league = current['league_id'][0]
year = current['year'][0]
to_find = current['type'][0]
page = current['page'][0]
index = np.where((european_seasons['league_id']==league)&(european_seasons['year']==year))[0][0]
remaining = 100
total_players_data = []
total_teams_data = []

while (index < len(european_seasons))&(remaining > 0):
    params = {'league': league,
              'season': year,
              'page': page
             }
    endpoint = to_find
    data, remaining_req = get_data(endpoint, params)
    
    #preprocess data
    if endpoint == 'players':
        players_data = [preprocess_players(data) for player in data['response']]
        total_players_data.append(players_data)
    else:
        teams_data = [preprocess_teams(data) for team in data['response']]
        total_teams_data.append(teams_data)
    
    max_page = df['paging']['total']
    
    if page == max_page & endpoint == 'players':
        to_find = 'teams'
        page = 1
        
    elif page == max_page & endpoint == 'teams':
        league = european_seasons.loc[index+1]['league_id']
        year = european_seasons.loc[index+1]['year']
        if european_seasons.loc[index+1]['players']:
            to_find = 'players'
        else:
            to_find = 'teams'
        page = 1
        
    else:
        page += 1
        
    remaining = remaining_req
    time.sleep(7)
#saving to sql database
if len(total_players_data) > 0:
    data_to_sql(players, total_teams_data)
                
if len(total_teams_data) > 0:
    data_to_sql(teams, total_teams_data)

if index < len(european_seasons):
    data = {'league_id': [league], 'year': [year], 'type': [to_find], 'page': [page]}
    current = pd.DataFrame(data)
    current.to_csv('current.csv')
    print(f'{len(european_seasons)-index} seasons left')
else:
    print('All teams and players playing in european leagues collected')

Index([0], dtype='int64')


# 2. Teams stats in each season