In [2]:
import os
import logging 
import requests 
import psycopg2
import pandas as pd 
from dotenv import load_dotenv
from requests.exceptions import RequestException

#loading environment variables
load_dotenv()

API_KEY         =   os.getenv("API_KEY")
API_HOST        =   os.getenv("API_HOST")
LEAGUE_ID       =   os.getenv("LEAGUE_ID")
SEASON          =   os.getenv("SEASON")
DB_NAME         =   os.getenv("DB_NAME")
DB_USERNAME     =   os.getenv("DB_USERNAME")
DB_PASSWORD     =   os.getenv("DB_PASSWORD")
DB_HOST         =   os.getenv("DB_HOST")
DB_PORT         =   os.getenv("DB_PORT")

#logging configuration
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Create a file handler (This handler writes messages to a log file on the system)
file_handler = logging.FileHandler('football_table_standings.log')
file_handler.setLevel(logging.DEBUG)
file_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))

# Create a console handler (This handler writes messages to console)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
console_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))

# Instantiate the logger object (to use handlers we need to add them to the logger object)
logger = logging.getLogger()

# Add the file handler to the logger
logger.addHandler(file_handler)

# Add the console handler to the logger
logger.addHandler(console_handler)

# using logging is better than using print statements because we can control the level of logging

In [3]:
# Api request

url = "https://odds.p.rapidapi.com/v4/sports/soccer_poland_ekstraklasa/scores"
headers       =   {"X-RapidAPI-Key": API_KEY, 
                   "X-RapidAPI-Host": API_HOST}

query_string  =   {"daysFrom":"3"}


# making a request to the API
try:
    api_response = requests.get(url, headers=headers, params=query_string, timeout=5)
    api_response.raise_for_status() 


except requests.HTTPError as http_err:
    logger.error(f'HTTP error occurred: {http_err}')


except requests.Timeout:
    logger.error('Request timed out after 15 seconds')


except RequestException as request_err:
    logger.error(f'Request error occurred: {request_err}')

# parse the api response
upcoming_matches = api_response.json()

# print api response
# logger.info(standings_data)

#4.4 step
# save response to file and read it via jq
with open('football_table_standings.json', 'w') as f:
    f.write(api_response.text)


In [4]:
# listing all sports
url = "https://odds.p.rapidapi.com/v4/sports"
headers       =   {"X-RapidAPI-Key": API_KEY, 
                   "X-RapidAPI-Host": API_HOST}

query_string  =   {"daysFrom":"3"}


# making a request to the API
try:
    api_response_sport = requests.get(url, headers=headers, timeout=5)
    api_response_sport.raise_for_status()
    #write sports list to file
    with open('sports_list.json', 'w') as f:
        f.write(api_response_sport.text)

        
except requests.HTTPError as http_err:
    logger.error(f'HTTP error occurred: {http_err}')


lista sportów
jq .[].key ".\sports_list.json"

In [5]:
from pandas import json_normalize


df = []
columns = ['time']

for matches in upcoming_matches:
    for key in matches.keys():
        if key not in columns:
            columns.append(key)
    df.append(matches)

matches_df = pd.DataFrame(df, columns=columns)
matches_df = pd.DataFrame(pd.json_normalize(upcoming_matches))

json_normalize(upcoming_matches, record_path=['scores'], meta = ['commence_time'] , errors='ignore')


Unnamed: 0,name,score,commence_time
0,Widzew Łódź,2,2023-11-18T16:37:07Z
1,Ruch Chorzów,1,2023-11-18T16:37:07Z


In [6]:
matches_df

Unnamed: 0,id,sport_key,sport_title,commence_time,completed,home_team,away_team,scores,last_update
0,ae39b7413454ec2b33a985ea19022669,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-18T16:37:07Z,True,Widzew Łódź,Ruch Chorzów,"[{'name': 'Widzew Łódź', 'score': '2'}, {'name...",2023-11-18T23:51:36Z
1,2eb9d94efbe2ab4cb4dbde4b25ed5f59,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-24T17:00:00Z,False,ŁKS Łódź,Zagłębie Lubin,,
2,277a015c665a07895057277c30106a55,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-24T19:30:00Z,False,Jagiellonia Białystok,Piast Gliwice,,
3,fb17d326e07cb380101baa721ed641ed,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-25T14:00:00Z,False,Raków Częstochowa,Cracovia Kraków,,
4,835bbe9509ae64aecfdb6afd24f3cdcb,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-25T16:30:00Z,False,Pogoń Szczecin,Stal Mielec,,
5,06b70c6556ddfc0dc04003e102f0ffea,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-25T19:00:00Z,False,Legia Warszawa,Warta Poznań,,
6,d747bcf4e030119bf218114de2efbb2d,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-26T11:30:00Z,False,Puszcza Niepołomice,Górnik Zabrze,,
7,d38d8830a365beb02adbffb81d45740d,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-26T14:00:00Z,False,Ruch Chorzów,Korona Kielce,,
8,e699f0c0ea67c0d6b0043be9efcccc79,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-26T16:30:00Z,False,Lech Poznań,Widzew Łódź,,
9,0a363814119ee8cb6e7c44d7cd1b18f4,soccer_poland_ekstraklasa,Ekstraklasa - Poland,2023-11-27T18:00:00Z,False,Radomiak Radom,Śląsk Wrocław,,


In [7]:
selected_columns = ['commence_time', 'home_team', 'away_team', 'completed']
selected_df = matches_df[selected_columns]

# Convert commence_time to normal date format
selected_df['commence_time'] = pd.to_datetime(selected_df['commence_time'])

with open('matches.json', 'w') as f:
    f.write(selected_df.to_json(orient='records'))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  selected_df['commence_time'] = pd.to_datetime(selected_df['commence_time'])


In [8]:
selected_df[selected_df["completed"] == True]


Unnamed: 0,commence_time,home_team,away_team,completed
0,2023-11-18 16:37:07+00:00,Widzew Łódź,Ruch Chorzów,True


In [15]:
completed

Unnamed: 0,name,score,commence_time
0,Widzew Łódź,2,2023-11-18T16:37:07Z
1,Ruch Chorzów,1,2023-11-18T16:37:07Z


In [38]:
import json
with open('football_table_standings.json') as f:
    matches_completed = json.load(f)

completed = pd.json_normalize(matches_completed, record_path=['scores'], meta = ['id','commence_time'] , errors='ignore')

completed = completed.pivot(index='commence_time', columns=('name'), values='score',)

In [41]:
completed.reset_index(inplace=True)