In [4]:
# backend/main.py
from fastapi import FastAPI
from pydantic import BaseModel
from typing import List, Optional, Dict
import pandas as pd
import numpy as np

# Load your existing code modules or paste adapted functions here
# (load CSVs, merge, train models once at startup; keep references)
# Assume functions: predict_match_outcome(...), _last_n_matches(...), _head_to_head(...)

# Startup: load data and train once (simplified sketch)
df_fifa = pd.read_csv('fifa2020-2024.csv')
df_results = pd.read_csv('result1.csv')
goals_df = pd.read_csv('goalscorers.csv')

# ... include your preprocessing, feature engineering, training code here ...
# Keep model_clf, model_reg, scaler, feature_columns, and helper functions in module scope.

app = FastAPI(title="Football Outcome API")

class PredictRequest(BaseModel):
    home_team: str
    away_team: str
    tournament: str
    neutral: bool = False
    match_date: Optional[str] = None
    include_penalties_for_prediction: bool = True
    penalty_weight: float = 0.7

class PredictResponse(BaseModel):
    winner: str
    scoreline: str
    home_scorers: List[str]
    away_scorers: List[str]
    proba_home_win: float

class InsightsResponse(BaseModel):
    last5_home: List[dict]
    last5_away: List[dict]
    h2h_last: List[dict]
    h2h_summary: Dict[str, int]

@app.get("/teams", response_model=List[str])
def list_teams():
    # union of team names from fifa + results
    t = set(df_fifa['team'].dropna().unique())
    t |= set(df_results['home_team'].dropna().unique())
    t |= set(df_results['away_team'].dropna().unique())
    return sorted(t)

@app.get("/tournaments", response_model=List[str])
def list_tournaments():
    if 'tournament' in df_results.columns:
        return sorted(pd.Series(df_results['tournament']).dropna().unique().tolist())
    return []

@app.get("/insights", response_model=InsightsResponse)
def insights(home_team: str, away_team: str):
    last5_h = _last_n_matches(df_results, home_team, n=5)
    last5_a = _last_n_matches(df_results, away_team, n=5)
    h2h_table, h2h_sum = _head_to_head(df_results, home_team, away_team, n=5)
    return InsightsResponse(
        last5_home=(last5_h.to_dict(orient="records") if not last5_h.empty else []),
        last5_away=(last5_a.to_dict(orient="records") if not last5_a.empty else []),
        h2h_last=(h2h_table.to_dict(orient="records") if not h2h_table.empty else []),
        h2h_summary=h2h_sum
    )

@app.post("/predict", response_model=PredictResponse)
def predict(req: PredictRequest):
    winner, scoreline, scorers = predict_match_outcome(
        req.home_team, req.away_team, req.tournament, req.neutral,
        match_date=pd.to_datetime(req.match_date) if req.match_date else None,
        include_penalties_for_prediction=req.include_penalties_for_prediction,
        penalty_weight=req.penalty_weight
    )
    # Optional: expose probability if you return it from your function
    proba_home_win = float(0.0)  # set from your model output if available
    return PredictResponse(
        winner=winner or "Unknown",
        scoreline=scoreline or "0-0",
        home_scorers=scorers.get(req.home_team, []),
        away_scorers=scorers.get(req.away_team, []),
        proba_home_win=proba_home_win
    )
