# Baseball Reference Web Scrape SQL Analysis: Team Performance Statistics
## 5-Step Analytics Framework
...

In [8]:
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

load_dotenv()

pg_user = os.getenv('PG_USER')
pg_password = os.getenv('PG_PASSWORD')
pg_host = os.getenv('PG_HOST')
pg_db = os.getenv('PG_DB')

engine = create_engine(
    f"postgresql+psycopg2://{pg_user}:{pg_password}@{pg_host}:5432/{pg_db}"
)

pd.set_option('display.max_rows', None)

In [9]:
sql_query = '''
SELECT
    "Team Full" AS team,
    "Year" AS year,
    AVG("Win_Percentage") AS avg_win_pct
FROM sql_project.team_payroll_records
GROUP BY team, year
ORDER BY year, avg_win_pct DESC;
'''

df = pd.read_sql(sql_query, engine)
df

Unnamed: 0,team,year,avg_win_pct
0,Philadelphia Phillies,2011,0.63
1,New York Yankees,2011,0.599
2,Texas Rangers,2011,0.593
3,Milwaukee Brewers,2011,0.593
4,Detroit Tigers,2011,0.586
5,Arizona Diamondbacks,2011,0.58
6,Tampa Bay Rays,2011,0.562
7,Boston Red Sox,2011,0.556
8,St. Louis Cardinals,2011,0.556
9,Atlanta Braves,2011,0.549


**Insight:** Consistently high-performing teams maintain win percentages over .550.  
**Recommendation:** Identify practices from top performers to apply to other teams.  
**Prediction:** Teams with stable management and investment tend to maintain higher win percentages.

In [11]:
sql_query = '''WITH win_data AS (
    SELECT
        "Team Full" AS team,
        "Year" AS year,
        "Win_Percentage"
    FROM sql_project.team_payroll_records
    WHERE "Win_Percentage" IS NOT NULL
),
win_changes AS (
    SELECT
        team,
        year,
        "Win_Percentage",
        "Win_Percentage" - LAG("Win_Percentage") OVER (
            PARTITION BY team 
            ORDER BY year
        ) AS win_pct_change
    FROM win_data
),
filtered_changes AS (
    SELECT *
    FROM win_changes
    WHERE 
        win_pct_change IS NOT NULL 
        AND win_pct_change <> 0
)

SELECT *
FROM filtered_changes
ORDER BY win_pct_change DESC
LIMIT 10;
'''

df = pd.read_sql(sql_query, engine)
df

Unnamed: 0,team,year,Win_Percentage,win_pct_change
0,Baltimore Orioles,2022,0.512,0.191
1,Kansas City Royals,2024,0.531,0.185
2,San Diego Padres,2020,0.617,0.185
3,San Francisco Giants,2021,0.66,0.177
4,Boston Red Sox,2013,0.599,0.173
5,Boston Red Sox,2021,0.568,0.168
6,Miami Marlins,2020,0.517,0.165
7,Minnesota Twins,2017,0.525,0.161
8,New York Mets,2022,0.623,0.148
9,Chicago Cubs,2015,0.599,0.148


**Insight:** Some teams achieved major win improvements following key investments.  
**Recommendation:** Study the management and roster changes during these years.  
**Prediction:** Similar strategies could yield improvements for lower-ranked teams.