In [7]:
import numpy as np
import pandas as pd
from cs50 import SQL
from random import randint
from scipy.stats import rv_discrete

db = SQL("sqlite:///qatarwc.db")

In [8]:
class Team:
    "Football team"
    def __init__(self, name, group):
        self.name = name
        self.group = group
        self.points = 0
        self.goals_scored = 0
        self.goals_received = 0
        self.stage = "groups"

    def match(self, scored, received):
        self.goals_scored += scored
        self.goals_received += received
        # Update points depending on result
        if scored > received:
            self.points += 3
        elif scored == received:
            self.points += 1

    def eliminate(self):
        self.stage = 'eliminated'

In [38]:
# Dict of Team instances
TEAMS = {}
def create_teams():
    
    for i, team in enumerate(db.execute('SELECT code, "group" FROM teams;')):
        TEAMS[team['code']] = Team(team['code'], team['group'])


In [54]:
TEAMS['MEX'].points

3

In [59]:
def simulate_match():
    """
    Returns the final score of a simulated football match. 
    The max goals ever scored in a WC match are 12.
    The max goals scored by one team are 10.
    """
    # Custom probability distribution of total match goals
    xk = range(13)
    pk = (0.09, 0.18, 0.28, 0.24, 0.08, 0.065, 0.025, 0.015, 0.0105, 0.006, 0.004, 0.0025, 0.002)
    pdist = rv_discrete(values=(xk, pk))

    # Generate sample
    total_goals = pdist.rvs() 
    # Assign a portion of goals to one team (max 10)
    t1_goals = randint(0,10) if total_goals>10 else randint(0, total_goals)

    # Return score
    return t1_goals, total_goals - t1_goals

def simulate_groups():
    """"
    Simulates the group stage for one group according to FIFA rules.
    https://digitalhub.fifa.com/m/2744a0a5e3ded185/original/FIFA-World-Cup-Qatar-2022-Regulations_EN.pdf
    """
    group_matches = db.execute("SELECT match, team1, team2 FROM fixtures WHERE stage = 'group matches';")
    groups_df = pd.DataFrame(group_matches).set_index('match')

    t1_goals=[]
    t2_goals=[]
    for _, row in groups_df.iterrows():
        g1, g2 = simulate_match()
        # Store score
        t1_goals.append(g1)
        t2_goals.append(g2)

        # Update statistics of both teams
        TEAMS[row['team1']].match(g1, g2)
        TEAMS[row['team2']].match(g2, g1)

    groups_df.insert(2, 't1_goals', t1_goals)
    groups_df.insert(3, 't2_goals', t2_goals)

    #print(groups_df)

In [51]:
TEAMS['QAT'].points

7

In [98]:
create_teams() # Reset stats every simulation
simulate_groups()

# SIMULATE A SINGLE GROUP
group =[]
gl = db.execute('SELECT code FROM teams WHERE "group"="C"')

for team in gl:
    t = TEAMS[team['code']]
    group.append([t.name, t.points, t.goals_scored - t.goals_received])
    
group_df = pd.DataFrame(group, columns=['team','pts','gdf']).sort_values(['pts','gdf'], ascending=[False, False])
group_df.index = [1,2,3,4]
group_df

Unnamed: 0,team,pts,gdf
1,ARG,7,3
2,KSA,4,-1
3,MEX,3,-1
4,POL,3,-1
