In [None]:
# Importing necessary libraries
import pandas as pd
from sqlalchemy import create_engine
import os
from dotenv import load_dotenv, dotenv_values

In [None]:
# This will take environment variables from .env file
load_dotenv()

In [None]:
# connect to database
raw_host = os.getenv('Database_host')
raw_username = os.getenv('Database_username')
raw_password = os.getenv('Database_password')
raw_schema = os.getenv('Database_schema')

raw_db_config = {
    "host": raw_host,
    "username": raw_username,
    "password": raw_password,
    "schema": raw_schema
}

#driver://username:password@host/database
raw_engine = create_engine(f"mysql+mysqlconnector://{raw_db_config['username']}:{raw_db_config['password']}@{raw_db_config['host']}/{raw_db_config['schema']}")


# Business question
### Who are the players we need and good performances？

In [None]:
query = """
WITH Average_stats AS (
    SELECT
        ROUND(AVG(gameplay)) AS avg_gamesplayed,
        ROUND(AVG(goals/gameplay), 2) AS avg_goals,
        ROUND(AVG(assists/gameplay), 2) AS avg_assists,
        ROUND(AVG(points/gameplay), 2) AS avg_points,
        ROUND(AVG(penalties/gameplay) * 60, 2) AS avg_penalties_seconds
    FROM 
        draft_playerstats
), Player_Performance AS (
    SELECT 
        #dp.player_id, 
        dp.full_name, 
        dp.position, 
        dp.league,
        dp.height, 
        dp.weight, 
        ps.gameplay,
        ROUND(ps.goals/ps.gameplay,2) AS goals_per_game, 
        ROUND(ps.assists/ps.gameplay,2) AS assists_per_game, 
        ROUND(ps.points/ps.gameplay,2) AS points_per_game, 
        ROUND((ps.penalties/ps.gameplay) * 60,2)AS penalties_seconds_per_game
    FROM 
        draft_players dp
    JOIN 
        draft_playerstats ps ON dp.player_id = ps.player_id
    WHERE 
        dp.position IN ('RW', 'C')
)
SELECT 
    p.* 
FROM 
    Player_Performance p, 
    Average_stats a
WHERE 
    p.gameplay > 30
    AND p.goals_per_game > a.avg_goals
    AND p.assists_per_game > a.avg_assists
    AND p.penalties_seconds_per_game < a.avg_penalties_seconds;
"""
df = pd.read_sql(query, raw_engine)

In [None]:
df.head()

In [None]:
df.to_csv('draft_performance.csv', index=False)

# Answer
### Insight: 28 players out of 400 are above average and fit the position the team needs
### Recommendation: Compare to the leagues they currently play in
### Prediction: The number of suitable players will decrease

## Business Question
### What are their strengths compared to their current league?

In [None]:
query = """
WITH Performance_Metrics AS (
    SELECT 
        vp.full_name,
        vp.league,
        vp.position,
        vp.height,
        vp.weight,
        vp.goals_per_game,
        vp.assists_per_game,
        vp.points_per_game,
        vp.penalties_seconds_per_game,
        AVG(vp.goals_per_game) OVER (PARTITION BY vp.league) AS avg_goals_per_league,
        AVG(vp.assists_per_game) OVER (PARTITION BY vp.league) AS avg_assists_per_league
    FROM 
        View_best_performance_player vp
),  
Classified_Performance AS (
	SELECT 
	    full_name,
	    league,
	    position,
	    height,
	    weight,
	    goals_per_game,
	    assists_per_game,
	    points_per_game,
	    penalties_seconds_per_game,
	    CASE
	        WHEN goals_per_game >= avg_goals_per_league AND assists_per_game >= avg_assists_per_league THEN 'Strong in Both'
	        WHEN goals_per_game >= avg_goals_per_league THEN 'Strong in Goals'
	        WHEN assists_per_game >= avg_assists_per_league THEN 'Strong in Assists'
	        ELSE 'Average'
	    END AS strength
	FROM 
	    Performance_Metrics
)
SELECT *
FROM Classified_Performance
WHERE strength != 'Average'
ORDER BY strength DESC;
"""
df = pd.read_sql(query, raw_engine)

In [None]:
df.head()

# Answer
### Insight: There were 21 players who met our final selection, and they were above the league average currently playing in the league.
### Recommendation: Check out more stats from their previous seasons also the height and weight.
### Prediction: The number of suitable players will decrease, but players who can better help the team win will be found.

In [None]:
df.to_csv('draft_player_performance.csv', index=False)