### Collectinng Data
Gets player statistics & gameweek performance from the FPL API & saves it to the database. This class does one thing only. SRP

### Solution
- Connects to the FPL API 
- gets current season player information
- gets gameweeks performance stats
- Saves data to the SQLite database

### API
- `/api/bootstrap-static/`, gets all players, teams & current gameweek info
- `/api/event/{gameweek}/live/`, gets performance stats for a specific gameweek

### References 
https://fantasy.premierleague.com/api/

https://docs.python-requests.org/

https://schedule.readthedocs.io/en/stable/

In [1]:
# Create database
import sqlite3

conn = sqlite3.connect('fpl_data.db')
cursor = conn.cursor()

cursor.execute('''
    CREATE TABLE IF NOT EXISTS players (
        player_id INTEGER PRIMARY KEY,
        name TEXT,
        team TEXT,
        position TEXT,
        price REAL
    )
''')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS gameweek_data (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        player_id INTEGER,
        gameweek INTEGER,
        minutes INTEGER,
        goals_scored INTEGER,
        assists INTEGER,
        clean_sheets INTEGER,
        bonus INTEGER,
        total_points INTEGER,
        UNIQUE(player_id, gameweek)
    )
''')

conn.commit()
conn.close()
print("Database Created")

Database Created


In [2]:
# Collecting data from the FPL API
# importing requests for making http requests to the API
# importing sqlite3 for saving data to the database
# importing time for adding delays between the API requests
import requests
import sqlite3
import time
# importing schedule for recurring tasks & the timestamps
import schedule 
from datetime import datetime

class DataCollector:
    """
    Collects data from the FPL API

    gets the player information & gameweeke performance data 
    & saves it to the SQLite database, scheduling functionality for automatic updates
    so dont have to run the class everytime.
    """

    def __init__(self):
        """Creating database path & link to the API URL """
        self.db = 'fpl_data.db'
        self.api = "https://fantasy.premierleague.com/api"

    def Getplayers(self):
        """Getting the players & saving it to the database"""
        print("Getting players")

        # Getting data from FPL API
        data = requests.get(f"{self.api}/bootstrap-static/").json()

        # Connecting to the database
        conn = sqlite3.connect(self.db)
        cursor = conn.cursor()

        # 1=GK, 2=DEF, 3=MID, 4=FWD
        positions = {1: 'GK', 2: 'DEF', 3: 'MID', 4: 'FWD'}
        # moving team IDs to team names
        teams = {t['id']: t['name'] for t in data['teams']}

        # Saving each player to the database
        for p in data['elements']:
            cursor.execute(
                'INSERT OR REPLACE INTO players VALUES (?, ?, ?, ?, ?)',
                (p['id'], p['web_name'], teams[p['team']], 
                 positions[p['element_type']], p['now_cost'] / 10)
            )
        
        # Save and close
        conn.commit()
        conn.close()
        # printing results
        print(f"Saved {len(data['elements'])} players")

        # returning events so scheduler can find the current gameweek
        return data.get('events', [])

    # getting gameweek data from a specific week
    def getGameweekData(self, gw):
        """Getting the stats for one gameweek"""
        # Calling API for a specific gameweek
        response = requests.get(f"{self.api}/event/{gw}/live/")
        
        # Check if the gameweek exists if not return false
        if response.status_code != 200:
            return False
        
        # Getting the data
        data = response.json()
        conn = sqlite3.connect(self.db)
        cursor = conn.cursor()

        # Saving each players gameweek performance data
        for p in data['elements']:
                s = p['stats']  # Get stats
                cursor.execute(
                    '''INSERT OR REPLACE INTO gameweek_data 
                    (player_id, gameweek, minutes, goals_scored, assists, 
                        clean_sheets, bonus, total_points)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?)''',
                    (p['id'], gw, s['minutes'], s['goals_scored'], s['assists'],
                    s['clean_sheets'], s['bonus'], s['total_points'])
                )
            
        # Save and close
        conn.commit()
        conn.close()
        print(f"Saved gameweek {gw}")
        return True  

    # Get data for all gameweeks
    def fetch_all_gameweeks(self):
        """Getting all the gameweek data"""
        # Looping through all the gameweeks 1-38 in a season
        for gw in range(1, 39):
            # Get gameweek data
            if not self.getGameweekData(gw):
                # if gameweek is not avaliale stop
                print(f"Stopped at gameweek {gw}")
                break  
            # Wait 1 second between requests
            time.sleep(1) 

    """
    # scheduler methods for automatic updates
    # gets the data from the api once 
    def run_update(self):
        
        Running an update cycle

        - Gets all current player info
        - finds out what the current gameweek is
        - update the current gameweek data
        - then updates the previous gameweek data
        
        print(f"Starting to update - {datetime.now()}")

        # putting the code in try catch block 
        try:
            # Getting all the players data & saves them to the database
            print("Getting player data")
            events = self.Getplayers()

            # find out what the current gameweek is
            print("Find the current gameweek")
            currentgw = next((e['id'] for e in events if e['is_current']), 1)

            print(f"Current gameweek {currentgw}")

            # update the current gameweek, gets gw data stats for all players
            print(f"Updating gameweek {currentgw}")
            self.getGameweekData(currentgw)

            # updating the previous gameweek for correctness
            if currentgw > 1:
                print(f"Updating gameweek {currentgw - 1}")
                self.getGameweekData(currentgw - 1)

            print(f"Update is  complete - {datetime.now()}")

        # if update failed print tis
        except Exception as e:
            print(f"Updating Failed: {e}")
    """

    """  
    # starting the scheduler
    # start automatically updates @6 am daily
    def startScheduler(self, update_time="06:00"):
        
        #Start the automatic updates

        #run the automaic update at 6am
        

        # scheduling dailt updates at the 6
        # see https://schedule.readthedocs.io/en/stable/
        schedule.every().day.at(update_time).do(self.run_update)

        print("\n\nFPL Data Update Scheduler")
        print(f"Data daily updated at: {update_time}")

        # Run once immediately on startup
        print("\nRunning initial update")
        self.run_update()
        
        # Keep the scheduler running
        print("\nScheduler is running")
        # while loop to check if its time to run scheduler
        #waits for a muinte then checks again
        while True:
            schedule.run_pending()
            time.sleep(60)  """


In [3]:
# Create collector
collector = DataCollector()
# Get all the players
collector.Getplayers()
# Get all the gameweek data
collector.fetch_all_gameweeks()

Getting players
Saved 817 players
Saved gameweek 1
Saved gameweek 2
Saved gameweek 3
Saved gameweek 4
Saved gameweek 5
Saved gameweek 6
Saved gameweek 7
Saved gameweek 8
Saved gameweek 9
Saved gameweek 10
Saved gameweek 11
Saved gameweek 12
Saved gameweek 13
Saved gameweek 14
Saved gameweek 15
Saved gameweek 16
Saved gameweek 17
Saved gameweek 18
Saved gameweek 19
Saved gameweek 20
Saved gameweek 21
Saved gameweek 22
Saved gameweek 23
Saved gameweek 24
Saved gameweek 25
Saved gameweek 26
Saved gameweek 27
Saved gameweek 28
Saved gameweek 29
Saved gameweek 30
Saved gameweek 31
Saved gameweek 32
Saved gameweek 33
Saved gameweek 34
Saved gameweek 35
Saved gameweek 36
Saved gameweek 37
Saved gameweek 38


In [4]:
# Imorting sqlite3 for database connection
# importing pandas for data anaylsis
# verifying the data that has been collected
import sqlite3
import pandas as pd

# Connecting to the database
conn = sqlite3.connect('fpl_data.db')

# Displaying the data that has been collected
print("Summary of Data")
print("=" * 50)

# counting the total number of players in the database
# using the sql query to find amount of players in the players table
total_players = pd.read_sql_query("SELECT COUNT(*) as count FROM players", conn)
print(f"Total players: {total_players['count'][0]}")

# Counting the total number of gameweek records
# using the sql query to find the amount of gameweek records from gameweek data
total_records = pd.read_sql_query("SELECT COUNT(*) as count FROM gameweek_data", conn)
print(f"Total gameweek records: {total_records['count'][0]}")

# Viewng the 5 most expensive players
# using sql query getting name, team, position & price from players table
print("\nSample Players")
print("=" * 50)
players = pd.read_sql_query("""
    SELECT name, team, position, price 
    FROM players 
    ORDER BY price DESC 
    LIMIT 5
""", conn)
print(players)

# Viewing sample gameweek data
# using sql query to print out the data from players & gameweek data
print("\nGameweek Data")
print("=" * 50)
gameweeks = pd.read_sql_query("""
    SELECT p.name, g.gameweek, g.minutes, g.goals_scored, 
           g.assists, g.total_points
    FROM gameweek_data g
    JOIN players p ON g.player_id = p.player_id
    WHERE g.total_points > 10
    ORDER BY g.total_points DESC
    LIMIT 10
""", conn)
print(gameweeks)

# closing the connection
conn.close()

Summary of Data
Total players: 817
Total gameweek records: 18990

Sample Players
      name       team position  price
0  Haaland   Man City      FWD   14.9
1  M.Salah  Liverpool      MID   14.0
2   Palmer    Chelsea      MID   10.5
3     Isak  Liverpool      FWD   10.3
4     Saka    Arsenal      MID    9.9

Gameweek Data
           name  gameweek  minutes  goals_scored  assists  total_points
0      J.Timber         2       63             2        1            24
1    Van de Ven         9       90             2        0            23
2  Lewis-Potter        17       90             2        0            21
3           Eze        12       90             3        0            20
4        Schade        18       90             3        0            20
5        Palmer        25       60             3        0            20
6   Hudson-Odoi        16       90             2        1            19
7       Semenyo         7       90             2        1            18
8         James         8   

In [5]:
# Running the daily updates
# runs daily at 6 am
#collector = DataCollector()
#collector.startScheduler()