In [1]:
import numpy as np
import pandas as pd
from cs50 import SQL
from random import randint
from scipy.stats import rv_discrete
from datetime import datetime

db = SQL("sqlite:///qatarwc.db")

In [2]:
class Team:
    """Football team in World Cup"""
    def __init__(self, name, group):
        self.name = name
        self.group = group
        self.points = 0
        self.goals_scored = 0
        self.goals_received = 0
        self.stage = "groups"

    def match(self, scored, received):
        self.goals_scored += scored
        self.goals_received += received
        # Update points depending on result
        if scored > received:
            self.points += 3
        elif scored == received:
            self.points += 1

    def eliminate(self):
        self.stage = 'eliminated'

In [3]:
def create_teams(): 
    """Loads teams from db into Team objects."""
    # Dict of Team instances
    TEAMS = {}   
    for i, team in enumerate(db.execute('SELECT code, "group" FROM teams;')):
        TEAMS[team['code']] = Team(team['code'], team['group'])
    return TEAMS

In [4]:
def simulate_match():
    """
    Returns the final score of a simulated football match. 
    The max goals ever scored in a WC match are 12.
    The max goals scored by one team are 10.
    """
    # Custom probability distribution of total match goals
    xk = range(13)
    pk = (0.09, 0.18, 0.28, 0.24, 0.09, 0.065, 0.025, 0.015, 0.0065, 0.004, 0.002, 0.0015, 0.001)
    pdist = rv_discrete(values=(xk, pk))

    # Generate sample
    total_goals = pdist.rvs() 
    # Assign a portion of goals to one team (max 10)
    t1_goals = randint(total_goals%10,10) if total_goals>10 else randint(0, total_goals)

    # Return score
    return t1_goals, total_goals - t1_goals

def simulate_group_stage(TEAMS):
    """"
    Simulates all the group stage matches according to FIFA rules.
    https://digitalhub.fifa.com/m/2744a0a5e3ded185/original/FIFA-World-Cup-Qatar-2022-Regulations_EN.pdf
    """
    group_matches = db.execute("SELECT match, team1, team2 FROM fixtures WHERE stage = 'group matches';")
    groups_df = pd.DataFrame(group_matches).set_index('match')

    t1_goals=[]
    t2_goals=[]
    for _, row in groups_df.iterrows():
        g1, g2 = simulate_match()
        # Store score
        t1_goals.append(g1)
        t2_goals.append(g2)
        # Update statistics of both teams
        TEAMS[row['team1']].match(g1, g2)
        TEAMS[row['team2']].match(g2, g1)

    groups_df.insert(2, 't1_goals', t1_goals)
    groups_df.insert(3, 't2_goals', t2_goals)

    return(groups_df)

In [7]:
group_labels = db.execute('SELECT DISTINCT "group" FROM teams') # Returns a list of dicts
GROUPS = [list(d.values())[0] for d in group_labels]


In [10]:
TEAMS = create_teams()
scores = simulate_group_stage(TEAMS).to_dict('index')

group_teams = dict()
group_fixtures = dict()
for g in GROUPS:
    teams = db.execute('SELECT code FROM teams WHERE "group"=?', g)
    group_teams[g] = [team['code'] for team in teams]
    fixtures = db.execute('SELECT * FROM fixtures WHERE team1 IN (SELECT code FROM teams WHERE "group"=?) ORDER BY date;', g)
    group_fixtures[g] = [{'date': datetime.strptime(match['date'], "%Y-%m-%d").strftime("%b %d"),
                             'id':match['match'], 't1': match['team1'], 't2' :match['team2'], 
                             't1_goals': scores[match['match']]['t1_goals'],
                             't2_goals': scores[match['match']]['t2_goals']} for match in fixtures]
group_teams

{'A': ['SEN', 'QAT', 'NED', 'ECU'],
 'B': ['IRN', 'ENG', 'USA', 'WAL'],
 'C': ['ARG', 'KSA', 'MEX', 'POL'],
 'D': ['DEN', 'TUN', 'FRA', 'AUS'],
 'E': ['GER', 'JPN', 'ESP', 'CRC'],
 'F': ['MAR', 'CRO', 'BEL', 'CAN'],
 'G': ['SUI', 'CMR', 'BRA', 'SRB'],
 'H': ['URU', 'KOR', 'POR', 'GHA']}

1.0


0.018000000000000002

In [11]:
create_teams() # Reset stats every simulation
r=simulate_groups()

# SIMULATE A SINGLE GROUP
group =[]
gl = db.execute('SELECT code FROM teams WHERE "group"="C"')

for team in gl:
    t = TEAMS[team['code']]
    group.append([t.name, t.points, t.goals_scored - t.goals_received])
    
group_df = pd.DataFrame(group, columns=['team','pts','gdf']).sort_values(['pts','gdf'], ascending=[False, False])
group_df.index = [1,2,3,4]
group_df

Unnamed: 0,team,pts,gdf
1,KSA,16,1
2,MEX,14,6
3,ARG,11,-3
4,POL,8,-4


## 

In [21]:
group_labels = db.execute('SELECT DISTINCT "group" FROM teams')
group_labels = [list(d.values())[0] for d in group_labels]


['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']

In [109]:
group_fixtures

{'A': [{'date': 'Nov 21',
   'id': 1,
   't1': 'QAT',
   't2': 'ECU',
   't1_goals': 2,
   't2_goals': 2},
  {'date': 'Nov 21',
   'id': 2,
   't1': 'SEN',
   't2': 'NED',
   't1_goals': 2,
   't2_goals': 0},
  {'date': 'Nov 25',
   'id': 18,
   't1': 'QAT',
   't2': 'SEN',
   't1_goals': 1,
   't2_goals': 2},
  {'date': 'Nov 25',
   'id': 19,
   't1': 'NED',
   't2': 'ECU',
   't1_goals': 5,
   't2_goals': 0},
  {'date': 'Nov 29',
   'id': 35,
   't1': 'ECU',
   't2': 'SEN',
   't1_goals': 1,
   't2_goals': 0},
  {'date': 'Nov 29',
   'id': 36,
   't1': 'NED',
   't2': 'QAT',
   't1_goals': 1,
   't2_goals': 0}],
 'B': [{'date': 'Nov 21',
   'id': 3,
   't1': 'ENG',
   't2': 'IRN',
   't1_goals': 1,
   't2_goals': 1},
  {'date': 'Nov 21',
   'id': 4,
   't1': 'USA',
   't2': 'WAL',
   't1_goals': 1,
   't2_goals': 2},
  {'date': 'Nov 25',
   'id': 17,
   't1': 'WAL',
   't2': 'IRN',
   't1_goals': 1,
   't2_goals': 0},
  {'date': 'Nov 25',
   'id': 20,
   't1': 'ENG',
   't2': 'USA',


In [153]:
simulate_groups()

Unnamed: 0_level_0,t1_goals,t2_goals
match,Unnamed: 1_level_1,Unnamed: 2_level_1
1,4,1
2,0,2
3,0,0
4,0,0
5,3,0
6,5,1
7,3,0
8,0,3
9,1,1
10,5,3


In [36]:
TEAM_CODES = db.execute('SELECT code, team FROM teams;')
TEAM_CODES = {team['code']:team['team'] for team in TEAM_CODES}
TEAM_CODES

{'SEN': 'Senegal',
 'QAT': 'Qatar',
 'NED': 'Netherlands',
 'ECU': 'Ecuador',
 'IRN': 'Iran',
 'ENG': 'England',
 'USA': 'United States',
 'WAL': 'Wales',
 'ARG': 'Argentina',
 'KSA': 'Saudi Arabia',
 'MEX': 'Mexico',
 'POL': 'Poland',
 'DEN': 'Denmark',
 'TUN': 'Tunisia',
 'FRA': 'France',
 'AUS': 'Australia',
 'GER': 'Germany',
 'JPN': 'Japan',
 'ESP': 'Spain',
 'CRC': 'Costa Rica',
 'MAR': 'Morocco',
 'CRO': 'Croatia',
 'BEL': 'Belgium',
 'CAN': 'Canada',
 'SUI': 'Switzerland',
 'CMR': 'Cameroon',
 'BRA': 'Brazil',
 'SRB': 'Serbia',
 'URU': 'Uruguay',
 'KOR': 'South Korea',
 'POR': 'Portugal',
 'GHA': 'Ghana'}

In [38]:
help(datetime)

Help on class datetime in module datetime:

class datetime(date)
 |  datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
 |  
 |  The year, month and day arguments are required. tzinfo may be None, or an
 |  instance of a tzinfo subclass. The remaining arguments may be ints.
 |  
 |  Method resolution order:
 |      datetime
 |      date
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __hash__(self, /)
 |      Return hash(self).
 |  
 |  __le__(self, value, /)
 |      Return self<=value.
 |  
 |  __lt__(self, value, /)
 |      Return self<value.
 |  
 |  __ne__(self, value, /)
 |      Return self!=value.
 |  
 |  __radd__(self, value

In [48]:
date=datetime.strptime("12-4-2022","%m-%d-%Y")

In [50]:
date.strftime("%b %d")

'Dec 04'

In [57]:
group = list(range(8))
group[::2]

[0, 2, 4, 6]

In [60]:
for x1, x2 in zip(group[::2], group[1::2]):
    print(x1, x2)

0 1
2 3
4 5
6 7
