In [53]:
def get_predicted_matches(start_date, end_date):
    
    conn = None
    db_params = {
        'host': 'localhost',
        'database': 'preds',
        'user': 'postgres',
        'password': 'pass',
        'port': '5432'
    }
    #get next friday and monday dates as start and end for query

    # get upocoming matches playing from next friday to monday
    try:
        conn = psycopg2.connect(**db_params)

        query = '''
    SELECT a.*
    FROM (
        SELECT p.*, f.league_id, fixture_date
        FROM predictions p
        JOIN fixtures f ON p.fixture_id = f.fixture_id
        WHERE f.fixture_date >= '{}' and fixture_date <= '{}'
    ) a
    LEFT JOIN odds o ON a.fixture_id = o.fixture_id
    WHERE o.fixture_id IS NULL;
    '''.format(start_date, end_date)
        current_matches = pd.read_sql_query(query, conn)
        
        return current_matches
    except Exception as e:
        print(f'Error {e}')
        return None, None
    finally:
        if conn is not None:
            conn.close()

In [45]:
import os
from dotenv import load_dotenv
import requests

def get_data(endpoint, params):
    
    load_dotenv()  # Load environment variables from .env file

    api_key = os.getenv('API_KEY')

    if api_key is None:
        raise ValueError("API key not set.")

    URL = "https://v3.football.api-sports.io/"
    headers = {
	'x-rapidapi-host': "v3.football.api-sports.io",
    'x-rapidapi-key': api_key
    }
    response = requests.get(
        URL+endpoint,
        headers = headers,
        params = params
    )
    if response.status_code == 200:
            
        remaining = response.headers.get("x-ratelimit-requests-remaining")
        data = response.json()
        print(f"requests before reaching limit {remaining}")

    else:
        print(f"Error {response.status_code}, {response.text}")

    return data, remaining                                                                     

In [46]:
def encode_data(data_dict, parent_key = '', sep= '_'):
    encoded = []
    for key, val in data_dict.items():
        new_key = f'{parent_key}{sep}{key}' if parent_key else key
        if isinstance(val, dict):
            encoded.extend(encode_data(val, new_key, sep=sep).items())
        elif isinstance(val, list):
            if val:
                if all(isinstance(i, dict) for i in val):
                    for k, v in enumerate(val):
                        v_key = f'{new_key}{sep}{k}'
                        encoded.extend(encode_data(v, v_key, sep=sep).items())
                else:
                    encoded.append((new_key, val))
            else:
                encoded.append((new_key, []))
        else:
            encoded.append((new_key, val))
    return dict(encoded)

In [47]:
def data_to_sql(table_name, df, db_params, conflict_columns):
    conn = None
    cur = None
    try:
        # Establish the connection
        conn = psycopg2.connect(**db_params)
        cur = conn.cursor()
        
        #insert data into tables
        if len(conflict_columns) == 0:
            insert_query = """
                INSERT INTO {} ({})
                VALUES ({})
            """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)))
        else:
            insert_query = """
                INSERT INTO {} ({})
                VALUES ({})
                ON CONFLICT ({}) DO NOTHING
            """.format(table_name, ','.join(df.columns), ','.join(['%s']*len(df.columns)), ','.join(conflict_columns))
        if len(df) > 0:
            last_row = df.iloc[-1]
        cur.executemany(insert_query, df.values.tolist())
        print(f'table {table_name} updated')
        
        # Commit the changes
        conn.commit()
        
    
    except Exception as e:
        print(f"Error: {e}")
        if last_row is not None:
            print(f"Last row loaded before the error occurred: {last_row}")
    finally:
        if conn is not None:
            # Close the cursor and connection
            cur.close()
        if cur is not None:
            conn.close()

In [73]:
def preprocess_data(odd_data):
    
    all_filtered_data = []
    for index, data in enumerate(odd_data):
        books = [len(book['bets']) for book in odd_data[index]['bookmakers']]
        bb = books.index(max(books))
        data = encode_data(data)
        filtered_data = {}
        filtered_data['fixture_id'] = data['fixture_id']
        for key, value in data.items():
            if value == 'Match Winner':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['result_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['result_draw'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
                    filtered_data['result_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Both Teams Score':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['both_scores_true'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['both_scores_false'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
            if value == 'Double Chance':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    if data[f'bookmakers_{bb}_bets_{bet_number}_values_0_value'] == 'Home/Draw':
                        filtered_data['double_chance_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    elif data[f'bookmakers_{bb}_bets_{bet_number}_values_0_value'] == 'Draw/Away':
                        filtered_data['double_chance_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    if (f'bookmakers_{bb}_bets_{bet_number}_values_1_odd' in data) & (data[f'bookmakers_{bb}_bets_{bet_number}_values_1_value'] == 'Draw/Away'):
                            filtered_data['double_chance_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
                    if f'bookmakers_{bb}_bets_{bet_number}_values_2_odd' in data:
                        filtered_data['double_chance_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']                        
            if value == 'First Half Winner':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['fh_result_home'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['fh_result_draw'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_1_odd']
                    filtered_data['fh_result_away'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Total - Home':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['home_over_1'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['home_over_2'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
            if value == 'Total - Away':
                match = re.search(fr'bookmakers_{bb}_bets_(\d+)_name', key)
                if match:
                    bet_number = match.group(1)
                    filtered_data['away_over_1'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_0_odd']
                    filtered_data['away_over_2'] = data[f'bookmakers_{bb}_bets_{bet_number}_values_2_odd']
                    
            cols = ['result_home', 'result_draw', 
                    'result_away', 'both_scores_true', 
                    'both_scores_false', 'double_chance_home', 
                    'double_chance_away', 'fh_result_home', 
                    'fh_result_draw', 'fh_result_away', 
                    'home_over_1', 'home_over_2', 
                    'away_over_1', 'away_over_2']
        for col in cols:
            if col not in filtered_data:
                filtered_data[col] = 0
        all_filtered_data.append(filtered_data)
    df = pd.DataFrame(all_filtered_data)
    return df

In [54]:
import requests
import re
import datetime
from datetime import date
import pandas as pd
import psycopg2

t = date.today()
start_date = t
while start_date.weekday() != 4:
    if start_date.weekday() < 4:
        start_date += datetime.timedelta(1)
    else:
        start_date -= datetime.timedelta(1)
end_date = start_date
while end_date.weekday() != 0:
    end_date += datetime.timedelta(1)

matches = get_predicted_matches(start_date, end_date)

  current_matches = pd.read_sql_query(query, conn)


In [60]:
#get only matches with odds available
c = pd.read_csv('data/european_seasons.csv')
matches = matches.merge(c[['league_id','odds']], on='league_id')
matches = matches[matches['odds']]
matches_list = list(matches['fixture_id'].unique())
leagues_list = list(matches['league_id'].unique())
    
odds_data = []
remaining = 10000
done = False
date = start_date
page = 1

while remaining > 0 and not done:
    season = list(c[c['league_id']==leagues_list[0]]['year'])[-1]
    params = {'league':leagues_list[0],
              'date':date,
              'season':season,
              'page':page}
    response, remaining = get_data('odds', params)    
    if page != response['paging']['total']:
        page += 1
    else:
        page = 1
        if date != end_date:
            date += datetime.timedelta(1)
        else:
            date = start_date
            leagues_list.pop(0)
        
    if len(response['response'])>0:
        odds_data.extend(match for match in response['response'])
    
    remaining = int(remaining)
    done = True if len(leagues_list)==0 else False
    
print(done)
if len(odds_data) > 0:
    # preprocess data
    df = preprocess_data(odds_data)
    # send to db
    #data_to_sql('odds', df, params, 'fixture_id')

requests before reaching limit 7174
requests before reaching limit 7173
requests before reaching limit 7172
requests before reaching limit 7171
requests before reaching limit 7170
requests before reaching limit 7169
requests before reaching limit 7168
requests before reaching limit 7167
requests before reaching limit 7166
requests before reaching limit 7165
requests before reaching limit 7164
requests before reaching limit 7163
requests before reaching limit 7162
requests before reaching limit 7161
requests before reaching limit 7160
requests before reaching limit 7159
requests before reaching limit 7158
requests before reaching limit 7157
requests before reaching limit 7156
requests before reaching limit 7155
requests before reaching limit 7154
requests before reaching limit 7153
requests before reaching limit 7152
requests before reaching limit 7151
requests before reaching limit 7150
requests before reaching limit 7149
requests before reaching limit 7148
requests before reaching lim

In [61]:
len(odds_data)

45

In [74]:
df = preprocess_data(odds_data)

In [11]:
a, b = get_data('odds', {'fixture': 1132615})

requests before reaching limit 497


In [83]:
c = encode_data(a['response'][0])

In [82]:
for b in a['response'][0]['bookmakers']:
    print(len(b['bets']))

2
25
23
15
3


### Preprocces

### 