### Queries 
Quries the FPL database & gets the data for the ML training.This class does the reading/querying from the database.

### Operations
- Gets player statistics from gameweeks
- Gets the dataset for ML training
- Calculates the recent form for all players

### References
https://docs.python.org/3/library/sqlite3.html

https://fantasy.premierleague.com/api/bootstrap-static/

https://pandas.pydata.org/docs/reference/api/pandas.read_sql_query.html

In [13]:
# Querrying Data from database
# importing sqlite to connect to the database
# importing pandas for data manipulation
import sqlite3
import pandas as pd

class QueryingData:
    """
    Queries FPL data from the database
    
    Retrieves player statistics & gameweek data for ML training
    """

    # using ../ as database is not in this folder
    def __init__(self, db_path='../fpl_data.db'):
        """
        Creating the query class with path to the database
        
        SQLite database file 'fpl_data.db'
        """
        # Storing the database location
        self.db_path = db_path

    # Get a specific players statistics
    def getPlayerStats(self, player_name):
        """
        Getting stats for a certain player across all gameweeks
        
        Search for player name & returns players gameweek data
            
        """
        # Connecting to the database
        conn = sqlite3.connect(self.db_path)
        
        # Using sql query to get player stats across from all the gameweeks
        # Uses LIKE for partial name matching
        query = '''
            SELECT p.name, g.gameweek, g.minutes, g.goals_scored, 
                   g.assists, g.total_points
            FROM gameweek_data g
            JOIN players p ON g.player_id = p.player_id
            WHERE p.name LIKE ?
            ORDER BY g.gameweek
        '''
        
        # Calling the query with parameter
        df = pd.read_sql_query(query, conn, params=(f'%{player_name}%',))
        
        # Closing the connection
        conn.close()
        return df
    
    # Getting all the data from the gameweeks
    def GettingAllTheData(self):
        """
        Geting all the data for ML training
        
        Returns all players and their gameweek performance data.

        Returns the dataset with all the players & gameweeks        
        """
        # Connecting to the database
        conn = sqlite3.connect(self.db_path)
        
        # Using an sql query to get all data
        # uses an sql join to get all the information from both tables
        query = '''
            SELECT p.player_id, p.name, p.position, p.team,
                   g.gameweek, g.minutes, g.goals_scored, g.assists,
                   g.clean_sheets, g.bonus, g.total_points
            FROM gameweek_data g
            JOIN players p ON g.player_id = p.player_id
            ORDER BY g.gameweek, p.player_id
        '''
        
        # Calling the query
        df = pd.read_sql_query(query, conn)
        
        # Closing the connection
        conn.close()
        return df
    
    def RecentPlayerForm(self, player_name, last_n=5):
        """
        Getting the player's recent form
        
        Calculates average points and totals for a player's recent gameweeks.
        
        Searches for a certain player
        Uses last 5 gameweeks to get an average
            
        Returns players recent form
        """
        # Connecting to the database
        conn = sqlite3.connect(self.db_path)
        
        # Using sql query to calculate the players recent form
        # Inner query gets last N gameweeks, outer query calculates averages
        query = '''
            SELECT p.name,
                   AVG(g.total_points) as avg_points,
                   SUM(g.goals_scored) as goals,
                   SUM(g.assists) as assists
            FROM (
                SELECT * FROM gameweek_data g2
                JOIN players p2 ON g2.player_id = p2.player_id
                WHERE p2.name LIKE ?
                ORDER BY g2.gameweek DESC
                LIMIT ?
            ) g
            JOIN players p ON g.player_id = p.player_id
        '''
        
        # Calling the query with player name and number of gameweeks
        df = pd.read_sql_query(query, conn, params=(f'%{player_name}%', last_n))
        
        # Closing the connection
        conn.close()
        return df

In [18]:
if __name__ == "__main__":
    query = QueryingData()
    
    # testing that looking for halland shows his last 5 gameweek stats
    print("Haaland's stats:")
    print(query.getPlayerStats("Haaland").tail())

    # getting salahs recent form
    print("\nRecent form test:")
    print(query.RecentPlayerForm("Salah"))
    
    # Testing all the data
    all_data = query.GettingAllTheData()
    print("\nGetting all dataset")
    print(f" Got {len(all_data)} records")

Haaland's stats:
       name  gameweek  minutes  goals_scored  assists  total_points
6   Haaland         7       90             1        0             8
7   Haaland         8       90             2        0            13
8   Haaland         9       90             0        0             2
9   Haaland        10       81             2        0            13
10  Haaland        11       90             1        0             4

Recent form test:
      name  avg_points  goals  assists
0  M.Salah         4.4      2        0

Getting all dataset
 Got 8063 records
