In [1]:
def get_predicted_matches(start_date, end_date):
    
    conn = None
    db_params = {
        'host': 'localhost',
        'database': 'preds',
        'user': 'postgres',
        'password': 'pass',
        'port': '5432'
    }
    #get next friday and monday dates as start and end for query

    # get upocoming matches playing from next friday to monday
    try:
        conn = psycopg2.connect(**db_params)

        query = '''
    SELECT p.*, f.league_id, fixture_date
    FROM predictions p
    JOIN fixtures f ON p.fixture_id = f.fixture_id
    WHERE f.fixture_date >= '{}' and fixture_date <= '{}'
    '''.format('2024-06-26', end_date)
        current_matches = pd.read_sql_query(query, conn)
        
        return current_matches
    except Exception as e:
        print(f'Error {e}')
        return None, None
    finally:
        if conn is not None:
            conn.close()

In [2]:
def get_data(endpoint, params):
    
    URL = "https://v3.football.api-sports.io/"
    headers = {
	'x-rapidapi-host': "v3.football.api-sports.io",
    'x-rapidapi-key': "fb2140228973d644db847895c454c22b"
    }
    
    response = requests.get(
        URL+endpoint,
        headers = headers,
        params = params
    )
    if response.status_code == 200:
            
        remaining = response.headers.get("x-ratelimit-requests-remaining")
        data = response.json()
        print(f"requests before reaching limit {remaining}")

    else:
        print(f"Error {response.status_code}, {response.text}")

    return data, remaining                                                                     

In [3]:
def encode_data(data_dict, parent_key = '', sep= '_'):
    encoded = []
    for key, val in data_dict.items():
        new_key = f'{parent_key}{sep}{key}' if parent_key else key
        if isinstance(val, dict):
            encoded.extend(encode_data(val, new_key, sep=sep).items())
        elif isinstance(val, list):
            if val:
                if all(isinstance(i, dict) for i in val):
                    for k, v in enumerate(val):
                        v_key = f'{new_key}{sep}{k}'
                        encoded.extend(encode_data(v, v_key, sep=sep).items())
                else:
                    encoded.append((new_key, val))
            else:
                encoded.append((new_key, []))
        else:
            encoded.append((new_key, val))
    return dict(encoded)

In [4]:
def data_to_sql(table_name, df, db_params, conflict_columns):
    conn = None
    cur = None
    try:
        # Establish the connection
        conn = psycopg2.connect(**db_params)
        cur = conn.cursor()
        
        #insert data into tables
        if len(conflict_columns) == 0:
            insert_query = """
                INSERT INTO {} ({})
                VALUES ({})
            """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)))
        else:
            insert_query = """
                INSERT INTO {} ({})
                VALUES ({})
                ON CONFLICT ({}) DO NOTHING
            """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)), ','.join(conflict_columns))
        if len(df) > 0:
            last_row = df.iloc[-1]
        cur.executemany(insert_query, df.values.tolist())
        print(f'table {table_name} updated')
        
        # Commit the changes
        conn.commit()
        
    
    except Exception as e:
        print(f"Error: {e}")
        if last_row is not None:
            print(f"Last row loaded before the error occurred: {last_row}")
    finally:
        if conn is not None:
            # Close the cursor and connection
            cur.close()
        if cur is not None:
            conn.close()

In [5]:
def preprocess_data(odd_data):
    
    all_filtered_data = []
    for index, data in enumerate(odd_data):
        books = [len(book['bets']) for book in odd_data[index]['bookmakers']]
        bb = books.index(max(books))
        data = encode_data(data)
        filtered_data = {}
        filtered_data['fixture_id'] = data['fixture_id']
        
        for key, value in data.items():
            if value == 'Match Winner':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['result_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['result_draw'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
                    filtered_data['result_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Both Teams Score':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['both_scores_true'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['both_scores_false'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
            if value == 'Double Chance':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['double_chance_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['double_chance_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
            if value == 'First Half Winner':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['fh_result_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['fh_result_draw'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
                    filtered_data['fh_result_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Total - Home':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['home_over_1'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['home_over_2'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Total - Away':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['away_over_1'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['away_over_2'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
                    
            cols = ['result_home', 'result_draw', 
                    'result_away', 'both_scores_true', 
                    'both_scores_false', 'double_chance_home', 
                    'double_chance_away', 'fh_result_home', 
                    'fh_result_draw', 'fh_result_away', 
                    'home_over_1', 'home_over_2', 
                    'away_over_1', 'away_over_2']
        for col in cols:
            if col not in filtered_data:
                filtered_data[col] = 0
        all_filtered_data.append(filtered_data)
    df = pd.DataFrame(all_filtered_data)
    return df

In [7]:
import requests
import re
import datetime
from datetime import date
import pandas as pd
import psycopg2

t = date.today()
start_date = t+datetime.timedelta(1) if t.weekday() == 4 else t
end_date = t+datetime.timedelta(1) if t.weekday() == 0 else t
while start_date.weekday() != 4:
    start_date += datetime.timedelta(1)
while end_date.weekday() != 0:
    end_date += datetime.timedelta(1)

matches = get_predicted_matches(start_date, end_date)

#get only matches with odds available
c = pd.read_csv('data/european_seasons.csv')
matches = matches.merge(c[['league_id','odds']], on='league_id')
matches = matches[matches['odds']]
matches_list = list(matches['fixture_id'].unique())
leagues_list = list(matches['league_id'].unique())
    
odds_data = []
remaining = 10000
done = False
date = start_date
page = 1

while remaining > 0 and not done:
    season = list(c[c['league_id']==leagues_list[0]]['year'])[-1]
    params = {'league':leagues_list[0],
              'date':date,
              'season':season,
              'page':page}
    response, remaining = get_data('odds', params)    
    date+=datetime.timedelta(1)
    if page != response['paging']['total']:
        page += 1
    else:
        page = 1
        leagues_list.pop(0)
        
    if len(response['response'])>0:
        odds_data.extend(match for match in response['response'])
    
    remaining = int(remaining)
    done = True if len(leagues_list)==0 else False
    
print(done)
if len(odds_data) > 0:
    # preprocess data
    df = preprocess_data(odds_data)
    # send to db
    #data_to_sql('odds', df, params, 'fixture_id')

  current_matches = pd.read_sql_query(query, conn)


requests before reaching limit 498
requests before reaching limit 497
requests before reaching limit 496
requests before reaching limit 495
requests before reaching limit 494
requests before reaching limit 493
requests before reaching limit 492
requests before reaching limit 491
requests before reaching limit 490
requests before reaching limit 489
requests before reaching limit 488
requests before reaching limit 487
requests before reaching limit 486
requests before reaching limit 485
requests before reaching limit 484
requests before reaching limit 483
requests before reaching limit 482
requests before reaching limit 481
True


In [8]:
response

{'get': 'odds',
 'parameters': {'league': '362',
  'date': '2024-07-22',
  'season': '2024',
  'page': '1'},
 'errors': [],
 'results': 0,
 'paging': {'current': 1, 'total': 1},
 'response': []}

In [11]:
a, b = get_data('odds', {'fixture': 1132615})

requests before reaching limit 497


In [83]:
c = encode_data(a['response'][0])

In [82]:
for b in a['response'][0]['bookmakers']:
    print(len(b['bets']))

2
25
23
15
3


### Preprocces

In [12]:
a

{'get': 'odds',
 'parameters': {'fixture': '1132615'},
 'errors': [],
 'results': 1,
 'paging': {'current': 1, 'total': 1},
 'response': [{'league': {'id': 233,
    'name': 'Premier League',
    'country': 'Egypt',
    'logo': 'https://media.api-sports.io/football/leagues/233.png',
    'flag': 'https://media.api-sports.io/flags/eg.svg',
    'season': 2023},
   'fixture': {'id': 1132615,
    'timezone': 'UTC',
    'date': '2024-07-01T17:00:00+00:00',
    'timestamp': 1719853200},
   'update': '2024-07-01T15:00:18+00:00',
   'bookmakers': [{'id': 27,
     'name': 'NordicBet',
     'bets': [{'id': 1,
       'name': 'Match Winner',
       'values': [{'value': 'Home', 'odd': '7.50'},
        {'value': 'Draw', 'odd': '4.50'},
        {'value': 'Away', 'odd': '1.38'}]},
      {'id': 2,
       'name': 'Home/Away',
       'values': [{'value': 'Home', 'odd': '6.00'},
        {'value': 'Away', 'odd': '1.11'}]},
      {'id': 3,
       'name': 'Second Half Winner',
       'values': [{'value': 'Home

### 