In [1]:
def get_predicted_matches(start_date, end_date):
    
    conn = None
    db_params = {
        'host': 'localhost',
        'database': 'preds',
        'user': 'postgres',
        'password': 'pass',
        'port': '5432'
    }
    #get next friday and monday dates as start and end for query

    # get upocoming matches playing from next friday to monday
    try:
        conn = psycopg2.connect(**db_params)

        query = '''
    SELECT p.*, f.league_id, fixture_date
    FROM predictions p
    JOIN fixtures f ON p.fixture_id = f.fixture_id
    WHERE f.fixture_date >= '{}' and fixture_date <= '{}'
    '''.format('2024-06-26', end_date)
        current_matches = pd.read_sql_query(query, conn)
        
        return current_matches
    except Exception as e:
        print(f'Error {e}')
        return None, None
    finally:
        if conn is not None:
            conn.close()

In [2]:
def get_data(endpoint, params):
    
    URL = "https://v3.football.api-sports.io/"
    headers = {
	'x-rapidapi-host': "v3.football.api-sports.io",
    'x-rapidapi-key': "fb2140228973d644db847895c454c22b"
    }
    
    response = requests.get(
        URL+endpoint,
        headers = headers,
        params = params
    )
    if response.status_code == 200:
            
        remaining = response.headers.get("x-ratelimit-requests-remaining")
        data = response.json()
        print(f"requests before reaching limit {remaining}")

    else:
        print(f"Error {response.status_code}, {response.text}")

    return data, remaining                                                                     

In [3]:
def encode_data(data_dict, parent_key = '', sep= '_'):
    encoded = []
    for key, val in data_dict.items():
        new_key = f'{parent_key}{sep}{key}' if parent_key else key
        if isinstance(val, dict):
            encoded.extend(encode_data(val, new_key, sep=sep).items())
        elif isinstance(val, list):
            if val:
                if all(isinstance(i, dict) for i in val):
                    for k, v in enumerate(val):
                        v_key = f'{new_key}{sep}{k}'
                        encoded.extend(encode_data(v, v_key, sep=sep).items())
                else:
                    encoded.append((new_key, val))
            else:
                encoded.append((new_key, []))
        else:
            encoded.append((new_key, val))
    return dict(encoded)

In [4]:
def data_to_sql(table_name, df, db_params, conflict_columns):
    conn = None
    cur = None
    try:
        # Establish the connection
        conn = psycopg2.connect(**db_params)
        cur = conn.cursor()
        
        #insert data into tables
        if len(conflict_columns) == 0:
            insert_query = """
                INSERT INTO {} ({})
                VALUES ({})
            """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)))
        else:
            insert_query = """
                INSERT INTO {} ({})
                VALUES ({})
                ON CONFLICT ({}) DO NOTHING
            """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)), ','.join(conflict_columns))
        if len(df) > 0:
            last_row = df.iloc[-1]
        cur.executemany(insert_query, df.values.tolist())
        print(f'table {table_name} updated')
        
        # Commit the changes
        conn.commit()
        
    
    except Exception as e:
        print(f"Error: {e}")
        if last_row is not None:
            print(f"Last row loaded before the error occurred: {last_row}")
    finally:
        if conn is not None:
            # Close the cursor and connection
            cur.close()
        if cur is not None:
            conn.close()

In [5]:
def preprocess_data(odd_data):
    
    all_filtered_data = []
    for index, data in enumerate(odd_data):
        books = [len(book['bets']) for book in odd_data[index]['bookmakers']]
        bb = books.index(max(books))
        data = encode_data(data)
        filtered_data = {}
        filtered_data['fixture_id'] = data['fixture_id']
        
        for key, value in data.items():
            if value == 'Match Winner':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['result_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['result_draw'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
                    filtered_data['result_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Both Teams Score':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['both_scores_true'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['both_scores_false'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
            if value == 'Double Chance':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['double_chance_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['double_chance_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
            if value == 'First Half Winner':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['fh_result_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['fh_result_draw'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
                    filtered_data['fh_result_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Total - Home':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['home_over_1'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['home_over_2'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Total - Away':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['away_over_1'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['away_over_2'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
                    
            cols = ['result_home', 'result_draw', 
                    'result_away', 'both_scores_true', 
                    'both_scores_false', 'double_chance_home', 
                    'double_chance_away', 'fh_result_home', 
                    'fh_result_draw', 'fh_result_away', 
                    'home_over_1', 'home_over_2', 
                    'away_over_1', 'away_over_2']
        for col in cols:
            if col not in filtered_data:
                filtered_data[col] = 0
        all_filtered_data.append(filtered_data)
    df = pd.DataFrame(all_filtered_data)
    return df

In [6]:
import requests
import re
import datetime
from datetime import date
import pandas as pd
import psycopg2

t = date.today()
start_date = t+datetime.timedelta(1) if t.weekday() == 4 else t
end_date = t+datetime.timedelta(1) if t.weekday() == 0 else t
while start_date.weekday() != 4:
    start_date += datetime.timedelta(1)
while end_date.weekday() != 0:
    end_date += datetime.timedelta(1)

matches = get_predicted_matches(start_date, end_date)

  current_matches = pd.read_sql_query(query, conn)


In [7]:
end_date

datetime.date(2024, 7, 8)

In [7]:
matches

Unnamed: 0,fixture_id,teams_home_id,teams_away_id,home_over_1_prob,home_over_2_prob,away_over_1_prob,away_over_2_prob,both_scores_prob,home_over_1_pred,home_over_2_pred,...,result_pred,result_first_half_pred,result_double_chance_home_pred,result_double_chance_away_pred,result_prob,result_first_half_prob,result_double_chance_home_prob,result_double_chance_away_prob,league_id,fixture_date
0,1154753,7020,18440,0.292462,0.117180,0.335135,0.138222,0.508579,False,False,...,1,1,False,False,0.434278,0.701957,0.330388,0.218294,775,2024-06-29
1,1154754,7027,6971,0.292462,0.117180,0.335135,0.138222,0.508579,False,False,...,1,1,False,False,0.475504,0.679931,0.234555,0.303772,775,2024-06-29
2,1154755,7047,21418,0.292462,0.117180,0.335135,0.138222,0.508579,False,False,...,1,1,False,False,0.468370,0.685441,0.254370,0.282936,775,2024-06-28
3,1154756,6986,22869,0.357944,0.153708,0.275187,0.089942,0.485666,False,False,...,1,1,False,False,0.481459,0.674004,0.215563,0.325780,775,2024-06-29
4,1154757,7009,12812,0.311239,0.127196,0.316591,0.121918,0.501752,False,False,...,1,1,False,False,0.481459,0.674004,0.215563,0.325780,775,2024-06-29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
105,1185306,23272,23273,0.581985,0.311163,0.241561,0.053060,0.553806,True,False,...,0,1,False,False,0.478106,0.810575,0.050196,0.498789,117,2024-07-07
106,1185307,3388,3384,0.052882,0.006245,0.440385,0.143388,0.286847,False,False,...,2,1,True,False,0.629523,0.633155,0.615790,0.070016,117,2024-07-06
107,1185308,11266,7802,0.281753,0.095393,0.135469,0.027628,0.337311,False,False,...,0,1,False,False,0.461875,0.755711,0.080363,0.466754,117,2024-07-06
108,1185309,3387,23271,0.777746,0.517779,0.072621,0.006500,0.380398,True,True,...,0,1,False,True,0.921381,0.588658,0.011905,0.919522,117,2024-07-05


In [10]:
#get only matches with odds available
c = pd.read_csv('data/european_seasons.csv')
matches = matches.merge(c[['league_id','odds']], on='league_id')
matches = matches[matches['odds']]
matches_list = list(matches['fixture_id'].unique())
leagues_list = list(matches['league_id'].unique())
    
odds_data = []
remaining = 10000
done = False
date = start_date
page = 1

while remaining > 0 and not done:
    season = list(c[c['league_id']==leagues_list[0]]['year'])[-1]
    params = {'league':leagues_list[0],
              'date':date,
              'season':season,
              'page':page}
    response, remaining = get_data('odds', params)    
    date+=datetime.timedelta(1)
    if page != response['paging']['total']:
        page += 1
    else:
        page = 1
        if date != end_date:
            date += datetime.timedelta(1)
        else:
            date = start_date
            leagues_list.pop(0)
        
    if len(response['response'])>0:
        odds_data.extend(match for match in response['response'])
    
    remaining = int(remaining)
    done = True if len(leagues_list)==0 else False
    
print(done)
if len(odds_data) > 0:
    # preprocess data
    df = preprocess_data(odds_data)
    # send to db
    #data_to_sql('odds', df, params, 'fixture_id')

requests before reaching limit 117
requests before reaching limit 116
requests before reaching limit 115
requests before reaching limit 114
requests before reaching limit 113
requests before reaching limit 112
requests before reaching limit 111
requests before reaching limit 110
requests before reaching limit 109
requests before reaching limit 108
requests before reaching limit 107
requests before reaching limit 106
requests before reaching limit 105
requests before reaching limit 104
requests before reaching limit 103
requests before reaching limit 102
requests before reaching limit 101
requests before reaching limit 100
requests before reaching limit 99
requests before reaching limit 98
requests before reaching limit 97
requests before reaching limit 96
requests before reaching limit 95
requests before reaching limit 94
requests before reaching limit 93
requests before reaching limit 92
requests before reaching limit 91
requests before reaching limit 90
requests before reaching limit

In [11]:
df

Unnamed: 0,fixture_id,result_home,result_draw,result_away,both_scores_true,both_scores_false,fh_result_home,fh_result_draw,fh_result_away,home_over_1,home_over_2,away_over_1,away_over_2,double_chance_home,double_chance_away
0,1154760,3.36,4.2,1.77,1.3,3.38,3.61,2.7,2.26,1.91,3.7,4.1,1.4,0.0,0.0
1,1154762,5.35,5.0,1.39,1.33,2.91,4.8,2.85,1.79,2.28,4.1,1.77,1.36,2.59,1.1


In [11]:
a, b = get_data('odds', {'fixture': 1132615})

requests before reaching limit 497


In [83]:
c = encode_data(a['response'][0])

In [82]:
for b in a['response'][0]['bookmakers']:
    print(len(b['bets']))

2
25
23
15
3


### Preprocces

### 