In [None]:
import httpx
from pydantic import BaseModel, Field
from typing import List, Optional, Union, Dict
import polars as pl
from sqlalchemy import create_engine

In [None]:
def store_to_postgres(df: pl.DataFrame, schema: str, table_name: str):
    # Database connection details
    db_username = ""
    db_password = ""
    db_host = "localhost"
    db_port = "5432"
    db_name = "sportsdata"

    # Create SQLAlchemy engine
    engine = create_engine(f"postgresql://{db_username}:{db_password}@{db_host}:{db_port}/{db_name}")

    # Convert Polars DataFrame to Pandas DataFrame
    pandas_df = df.to_pandas()

    # Store the dataframe to the database
    pandas_df.to_sql(table_name, engine, schema=schema, if_exists="append", index=False)

    print(f"Data successfully stored in {schema}.{table_name}")

In [None]:
class Team(BaseModel):
    id: int
    label: str
    imageUrl: str
    isPopular: bool

class PositionType(BaseModel):
    id: str
    name: str

class Position(BaseModel):
    id: str
    shortLabel: str
    label: str
    positionType: PositionType

class Archetype(BaseModel):
    id: str
    label: str

class Iteration(BaseModel):
    id: str
    label: str

class NumericStat(BaseModel):
    value: float
    diff: int

class RunningStyleStat(BaseModel):
    value: str
    diff: int

class AbilityType(BaseModel):
    id: str
    label: str
    imageUrl: str
    iconUrl: str

class Ability(BaseModel):
    id: str
    label: str
    description: str
    imageUrl: str
    type: AbilityType

class PlayerRating(BaseModel):
    id: int
    overallRating: int
    firstName: str
    lastName: str
    birthdate: str
    height: int
    weight: int
    college: str
    handedness: int
    age: int
    jerseyNum: int
    yearsPro: int
    playerAbilities: List[Ability]
    avatarUrl: Optional[str]
    archetype: Optional[Archetype]
    team: Team
    position: Position
    iteration: Iteration
    stats: Dict[str, Union[NumericStat, RunningStyleStat]]

class RatingsResponse(BaseModel):
    items: List[PlayerRating]
    totalItems: int

In [None]:
BASE_URL = "https://drop-api.ea.com/rating/madden-nfl"

In [None]:
def get_madden_ratings(locale: str = "en", iteration: str = "1-base") -> List[PlayerRating]:
    """
    Fetches Madden ratings based on the specified locale and iteration.

    Args:
        locale (str): The locale for the ratings. Defaults to "en".
        iteration (str): The iteration of the ratings. Defaults to "1-base".

    Returns:
        List[PlayerRating]: A list of player ratings.
    """
    url = f"{BASE_URL}?locale={locale}&iteration={iteration}"
    all_ratings = []

    with httpx.Client() as client:
        response = client.get(url)
        response.raise_for_status()
        data = response.json()
        ratings_response = RatingsResponse(**data)
        all_ratings.extend(ratings_response.items)

        total_count = ratings_response.totalItems
        while len(all_ratings) < total_count:
            next_url = f"{url}&limit=100&offset={len(all_ratings)}"
            response = client.get(next_url)
            response.raise_for_status()
            data = response.json()
            ratings_response = RatingsResponse(**data)
            all_ratings.extend(ratings_response.items)

    print(f"Total items fetched: {len(all_ratings)}")
    print(f"Expected total items: {total_count}")

    return all_ratings

def create_madden_nfl_dataframe() -> pl.DataFrame:
    """
    Creates a Polars DataFrame from Madden NFL ratings data.

    Returns:
        pl.DataFrame: A DataFrame containing Madden NFL player ratings.
    """
    ratings = get_madden_ratings(iteration="5-week-4")
    
    # Convert Pydantic models to dictionaries
    data = [rating.dict() for rating in ratings]
    
    # Create DataFrame
    df = pl.DataFrame(data)
    
    # Add fullName column
    df = df.with_columns([
        (pl.col("firstName") + " " + pl.col("lastName")).alias("fullName")
    ])
    
    # Flatten nested structures
    df = df.with_columns([
        pl.col("team").struct.field("id").alias("team_id"),
        pl.col("team").struct.field("label").alias("team_label"),
        pl.col("team").struct.field("imageUrl").alias("team_imageUrl"),
        pl.col("team").struct.field("isPopular").alias("team_isPopular"),
        pl.col("position").struct.field("id").alias("position_id"),
        pl.col("position").struct.field("shortLabel").alias("position_shortLabel"),
        pl.col("position").struct.field("label").alias("position_label"),
        pl.col("position").struct.field("positionType").struct.field("id").alias("position_type_id"),
        pl.col("position").struct.field("positionType").struct.field("name").alias("position_type_name"),
        pl.col("iteration").struct.field("id").alias("iteration_id"),
        pl.col("iteration").struct.field("label").alias("iteration_label"),
        pl.col("archetype").struct.field("id").alias("archetype_id"),
        pl.col("archetype").struct.field("label").alias("archetype_label"),
    ])

    # Flatten stats
    stat_columns = df.select(pl.col("stats")).to_series().struct.fields
    for stat in stat_columns:
        df = df.with_columns([
            pl.col("stats").struct.field(stat).struct.field("value").alias(f"{stat}_rating")
        ])

    # Drop original nested columns
    df = df.drop(["team", "position", "iteration", "stats", "playerAbilities", "archetype"])

    # Convert data types
    date_columns = ["birthdate"]
    int_columns = ["id", "overallRating", "height", "weight", "handedness", "age", "jerseyNum", "yearsPro", "team_id"]
    float_columns = [col for col in df.columns if col.endswith("_rating") and col != "runningStyle_rating"]
    bool_columns = ["team_isPopular"]

    for col in date_columns:
        df = df.with_columns([
            pl.col(col)
            .str.strptime(pl.Date, format="%Y-%m-%d", strict=False)  # Changed 'fmt' to 'format'
            .fill_null(
                pl.col(col).str.strptime(pl.Date, format="%m/%d/%Y", strict=False)  # Changed 'fmt' to 'format'
            )
            .alias(col)
        ])

    for col in int_columns:
        df = df.with_columns(pl.col(col).cast(pl.Int64))

    for col in float_columns:
        df = df.with_columns(pl.col(col).cast(pl.Float64))

    for col in bool_columns:
        df = df.with_columns(pl.col(col).cast(pl.Boolean))

    print(f"DataFrame shape: {df.shape}")
    return df

# Usage
if __name__ == "__main__":
    df = create_madden_nfl_dataframe()
    store_to_postgres(df, "raw", "m25__player_ratings")