In [1]:
import os
from sqlalchemy import create_engine
import pandas as pd

# Set credentials 
os.environ['PG_HOST'] = 'isba-dev-02.cmb4w8cmqb26.us-east-1.rds.amazonaws.com'
os.environ['PG_USER'] = 'postgres'
os.environ['PG_PASSWORD'] = 'isba_4715'
os.environ['PG_DB'] = 'data_engineer_project'

host = os.getenv('PG_HOST')
user = os.getenv('PG_USER')
password = os.getenv('PG_PASSWORD')
database = os.getenv('PG_DB')

connection_string = f"postgresql+psycopg2://{user}:{password}@{host}/{database}"
engine = create_engine(connection_string)
pd.set_option('display.max_rows', None)

test_query = '''
SELECT COUNT(*) FROM raw."Gold_Ranked_Players"
limit 10;
'''
pd.read_sql(test_query, engine)

Unnamed: 0,count
0,820


# Descriptive Business Question: Is there a significant win/loss ratio gap between new players and experienced players?

In [2]:
sql_query = '''
WITH player_types AS (
    SELECT
        CASE
            WHEN "freshBlood" = 'TRUE' THEN 'New Player'
            ELSE 'Experienced Player'
        END AS player_type,
        wins,
        losses
    FROM raw."Gold_Ranked_Players"
)
SELECT
    player_type,
    COUNT(*) AS player_count,
    ROUND(AVG(wins)::numeric / NULLIF(AVG(losses), 0), 2) AS win_loss_ratio
FROM player_types
GROUP BY player_type
ORDER BY player_type;
'''


# Execute
query_result = pd.read_sql(sql_query, engine)
query_result

Unnamed: 0,player_type,player_count,win_loss_ratio
0,Experienced Player,760,1.04
1,New Player,60,1.02


# Insights: 
1. New players (freshBlood = TRUE) had a win/loss ratio of 1.02
2. Experienced players had a slightly higher ratio of 1.04
3. Despite having significantly less players,new players are not at a major disadvantage competitively. 
4. Indication of skill-based matchmaking, new players will not lose more just because they lack game experience. 

# Recommendations
Since new players are doing just fine, this is a great chance to keep them motivated. 
Try to reach them and optimize their experience

# Prediction
If new players continue to get support early on, they will likely improve even faster and keep playing, 
helping to grow the player base long term.

# Diagnostic Business Quesion: How do new players on a hot streak compare to other new players in terms of performance, and what key differences stand out?

In [None]:
sql_query = '''
WITH new_players AS (
    SELECT 
        "hotStreak",
        wins,
        losses,
        ROW_NUMBER() OVER (PARTITION BY "hotStreak" ORDER BY wins DESC) AS rn
    FROM raw."Gold_Ranked_Players"
    WHERE "freshBlood" = 'TRUE'
),
joined_players AS (
    SELECT a."hotStreak", a.wins, a.losses
    FROM new_players a
    JOIN (SELECT 1 AS dummy) b ON 1 = 1
)
SELECT 
    "hotStreak",
    COUNT(*) AS player_count,
    ROUND(AVG(wins::numeric / NULLIF(losses, 0)), 2) AS avg_win_loss_ratio
FROM joined_players
GROUP BY "hotStreak"
ORDER BY "hotStreak" DESC;
'''


# Execute
query_result = pd.read_sql(sql_query, engine)
query_result

Unnamed: 0,hotStreak,player_count,avg_win_loss_ratio
0,True,13,2.39
1,False,47,0.99


# Insights
1.  13 out of 60 (≈22%) are flagged as on a hot streak
2. These players perform significantly better, with a more than double average win/loss ratio than other new players

# Recommendation
Target hot streak players with early positive reinforcement, provide incentives to keep their momentum.

# Prediction
This could improve player retention and overall new player satisfaction, a higher conversion rate from new to experienced players