In [1]:
import numpy as np
import pandas as pd
from cs50 import SQL
from random import randint
from scipy.stats import rv_discrete

db = SQL("sqlite:///qatarwc.db")

In [2]:
class Team:
    "Football team"
    def __init__(self, name, group):
        self.name = name
        self.group = group
        self.points = 0
        self.goals_scored = 0
        self.goals_received = 0
        self.stage = "groups"

    def match(self, scored, received):
        self.goals_scored += scored
        self.goals_received += received
        # Update points depending on result
        if scored > received:
            self.points += 3
        elif scored == received:
            self.points += 1

    def eliminate(self):
        self.stage = 'eliminated'

In [26]:
def create_teams():
    # Dict of Team instances
    TEAMS = dict()
    for i, team in enumerate(db.execute('SELECT code, "group" FROM teams;')):
        TEAMS[team['code']] = Team(team['code'], team['group'])
    return TEAMS

In [96]:
def simulate_match():
    """
    Returns the final score of a simulated football match. 
    The max goals ever scored in a WC match are 12.
    The max goals scored by one team are 10.
    """
    # Custom probability distribution of total match goals
    xk = range(13)
    pk = (0.09, 0.18, 0.28, 0.24, 0.08, 0.065, 0.025, 0.015, 0.0105, 0.006, 0.004, 0.0025, 0.002)
    pdist = rv_discrete(values=(xk, pk))

    # Generate sample
    total_goals = pdist.rvs() 
    # Assign a portion of goals to one team (max 10)
    t1_goals = randint(0,10) if total_goals>10 else randint(0, total_goals)

    # Return score
    return t1_goals, total_goals - t1_goals

def simulate_groups():
    """"
    Simulates the group stage for one group according to FIFA rules.
    https://digitalhub.fifa.com/m/2744a0a5e3ded185/original/FIFA-World-Cup-Qatar-2022-Regulations_EN.pdf
    """
    group_matches = db.execute("SELECT match, team1, team2 FROM fixtures WHERE stage = 'group matches';")
    groups_df = pd.DataFrame(group_matches).set_index('match')

    t1_goals=[]
    t2_goals=[]
    for _, row in groups_df.iterrows():
        g1, g2 = simulate_match()
        # Store score
        t1_goals.append(g1)
        t2_goals.append(g2)

        # Update statistics of both teams
        TEAMS[row['team1']].match(g1, g2)
        TEAMS[row['team2']].match(g2, g1)

    groups_df.insert(2, 't1_goals', t1_goals)
    groups_df.insert(3, 't2_goals', t2_goals)

    return(groups_df[['t1_goals', 't2_goals']])

In [97]:
print(simulate_groups().to_dict('index'))

{1: {'t1_goals': 2, 't2_goals': 0}, 2: {'t1_goals': 1, 't2_goals': 2}, 3: {'t1_goals': 3, 't2_goals': 3}, 4: {'t1_goals': 1, 't2_goals': 1}, 5: {'t1_goals': 2, 't2_goals': 2}, 6: {'t1_goals': 0, 't2_goals': 3}, 7: {'t1_goals': 1, 't2_goals': 1}, 8: {'t1_goals': 0, 't2_goals': 6}, 9: {'t1_goals': 3, 't2_goals': 3}, 10: {'t1_goals': 0, 't2_goals': 0}, 11: {'t1_goals': 3, 't2_goals': 0}, 12: {'t1_goals': 1, 't2_goals': 0}, 13: {'t1_goals': 0, 't2_goals': 1}, 14: {'t1_goals': 2, 't2_goals': 1}, 15: {'t1_goals': 1, 't2_goals': 1}, 16: {'t1_goals': 2, 't2_goals': 2}, 17: {'t1_goals': 0, 't2_goals': 1}, 18: {'t1_goals': 1, 't2_goals': 0}, 19: {'t1_goals': 2, 't2_goals': 1}, 20: {'t1_goals': 1, 't2_goals': 1}, 21: {'t1_goals': 1, 't2_goals': 1}, 22: {'t1_goals': 2, 't2_goals': 0}, 23: {'t1_goals': 1, 't2_goals': 0}, 24: {'t1_goals': 1, 't2_goals': 1}, 25: {'t1_goals': 1, 't2_goals': 0}, 26: {'t1_goals': 1, 't2_goals': 1}, 27: {'t1_goals': 1, 't2_goals': 3}, 28: {'t1_goals': 1, 't2_goals': 1}, 

In [6]:
TEAMS['QAT'].points

KeyError: 'QAT'

In [84]:
create_teams() # Reset stats every simulation
r=simulate_groups()

# SIMULATE A SINGLE GROUP
group =[]
gl = db.execute('SELECT code FROM teams WHERE "group"="C"')

for team in gl:
    t = TEAMS[team['code']]
    group.append([t.name, t.points, t.goals_scored - t.goals_received])
    
group_df = pd.DataFrame(group, columns=['team','pts','gdf']).sort_values(['pts','gdf'], ascending=[False, False])
group_df.index = [1,2,3,4]
group_df

Unnamed: 0,team,pts,gdf
1,POL,11,8
2,MEX,11,-1
3,KSA,6,0
4,ARG,4,-7


## 

In [21]:
group_labels = db.execute('SELECT DISTINCT "group" FROM teams')
group_labels = [list(d.values())[0] for d in group_labels]


['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H']

In [24]:
group_labels = db.execute('SELECT DISTINCT "group" FROM teams') # Returns a list of dicts
GROUPS = [list(d.values())[0] for d in group_labels]


In [103]:
TEAMS = create_teams()
group_teams = dict()
group_fixtures = dict()
for g in GROUPS:
    teams = db.execute('SELECT code FROM teams WHERE "group"=?', g)
    group_teams[g] = [team['code'] for team in teams]
    fixtures = db.execute('SELECT * FROM fixtures WHERE team1 IN (SELECT code FROM teams WHERE "group"=?) ORDER BY date;', g)
    group_fixtures[g] = [(match['date'], match['team1'], match['team2']) for match in fixtures]
group_teams

{'A': ['SEN', 'QAT', 'NED', 'ECU'],
 'B': ['IRN', 'ENG', 'USA', 'WAL'],
 'C': ['ARG', 'KSA', 'MEX', 'POL'],
 'D': ['DEN', 'TUN', 'FRA', 'AUS'],
 'E': ['GER', 'JPN', 'ESP', 'CRC'],
 'F': ['MAR', 'CRO', 'BEL', 'CAN'],
 'G': ['SUI', 'CMR', 'BRA', 'SRB'],
 'H': ['URU', 'KOR', 'POR', 'GHA']}

In [104]:
group_fixtures

{'A': [('2022-11-21', 'QAT', 'ECU'),
  ('2022-11-21', 'SEN', 'NED'),
  ('2022-11-25', 'QAT', 'SEN'),
  ('2022-11-25', 'NED', 'ECU'),
  ('2022-11-29', 'ECU', 'SEN'),
  ('2022-11-29', 'NED', 'QAT')],
 'B': [('2022-11-21', 'ENG', 'IRN'),
  ('2022-11-21', 'USA', 'WAL'),
  ('2022-11-25', 'WAL', 'IRN'),
  ('2022-11-25', 'ENG', 'USA'),
  ('2022-11-29', 'WAL', 'ENG'),
  ('2022-11-29', 'IRN', 'USA')],
 'C': [('2022-11-22', 'MEX', 'POL'),
  ('2022-11-22', 'ARG', 'KSA'),
  ('2022-11-26', 'POL', 'KSA'),
  ('2022-11-26', 'ARG', 'MEX'),
  ('2022-11-30', 'POL', 'ARG'),
  ('2022-11-30', 'KSA', 'MEX')],
 'D': [('2022-11-22', 'FRA', 'AUS'),
  ('2022-11-22', 'DEN', 'TUN'),
  ('2022-11-26', 'TUN', 'AUS'),
  ('2022-11-26', 'FRA', 'DEN'),
  ('2022-11-30', 'AUS', 'DEN'),
  ('2022-11-30', 'TUN', 'FRA')],
 'E': [('2022-11-23', 'ESP', 'CRC'),
  ('2022-11-23', 'GER', 'JPN'),
  ('2022-11-27', 'JPN', 'CRC'),
  ('2022-11-27', 'ESP', 'GER'),
  ('2022-12-01', 'JPN', 'ESP'),
  ('2022-12-01', 'CRC', 'GER')],
 'F': [('2

[[('Nov 21', 'QAT', 'ECU', 1),
  ('Nov 21', 'SEN', 'NED', 2),
  ('Nov 25', 'QAT', 'SEN', 18),
  ('Nov 25', 'NED', 'ECU', 19),
  ('Nov 29', 'ECU', 'SEN', 35),
  ('Nov 29', 'NED', 'QAT', 36)],
 [('Nov 21', 'ENG', 'IRN', 3),
  ('Nov 21', 'USA', 'WAL', 4),
  ('Nov 25', 'WAL', 'IRN', 17),
  ('Nov 25', 'ENG', 'USA', 20),
  ('Nov 29', 'WAL', 'ENG', 33),
  ('Nov 29', 'IRN', 'USA', 34)],
 [('Nov 22', 'MEX', 'POL', 7),
  ('Nov 22', 'ARG', 'KSA', 8),
  ('Nov 26', 'POL', 'KSA', 22),
  ('Nov 26', 'ARG', 'MEX', 24),
  ('Nov 30', 'POL', 'ARG', 39),
  ('Nov 30', 'KSA', 'MEX', 40)],
 [('Nov 22', 'FRA', 'AUS', 5),
  ('Nov 22', 'DEN', 'TUN', 6),
  ('Nov 26', 'TUN', 'AUS', 21),
  ('Nov 26', 'FRA', 'DEN', 23),
  ('Nov 30', 'AUS', 'DEN', 37),
  ('Nov 30', 'TUN', 'FRA', 38)],
 [('Nov 23', 'ESP', 'CRC', 10),
  ('Nov 23', 'GER', 'JPN', 11),
  ('Nov 27', 'JPN', 'CRC', 25),
  ('Nov 27', 'ESP', 'GER', 28),
  ('Dec 01', 'JPN', 'ESP', 43),
  ('Dec 01', 'CRC', 'GER', 44)],
 [('Nov 23', 'BEL', 'CAN', 9),
  ('Nov 23',

In [36]:
TEAM_CODES = db.execute('SELECT code, team FROM teams;')
TEAM_CODES = {team['code']:team['team'] for team in TEAM_CODES}
TEAM_CODES

{'SEN': 'Senegal',
 'QAT': 'Qatar',
 'NED': 'Netherlands',
 'ECU': 'Ecuador',
 'IRN': 'Iran',
 'ENG': 'England',
 'USA': 'United States',
 'WAL': 'Wales',
 'ARG': 'Argentina',
 'KSA': 'Saudi Arabia',
 'MEX': 'Mexico',
 'POL': 'Poland',
 'DEN': 'Denmark',
 'TUN': 'Tunisia',
 'FRA': 'France',
 'AUS': 'Australia',
 'GER': 'Germany',
 'JPN': 'Japan',
 'ESP': 'Spain',
 'CRC': 'Costa Rica',
 'MAR': 'Morocco',
 'CRO': 'Croatia',
 'BEL': 'Belgium',
 'CAN': 'Canada',
 'SUI': 'Switzerland',
 'CMR': 'Cameroon',
 'BRA': 'Brazil',
 'SRB': 'Serbia',
 'URU': 'Uruguay',
 'KOR': 'South Korea',
 'POR': 'Portugal',
 'GHA': 'Ghana'}

In [37]:
from datetime import datetime


In [38]:
help(datetime)

Help on class datetime in module datetime:

class datetime(date)
 |  datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
 |  
 |  The year, month and day arguments are required. tzinfo may be None, or an
 |  instance of a tzinfo subclass. The remaining arguments may be ints.
 |  
 |  Method resolution order:
 |      datetime
 |      date
 |      builtins.object
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __gt__(self, value, /)
 |      Return self>value.
 |  
 |  __hash__(self, /)
 |      Return hash(self).
 |  
 |  __le__(self, value, /)
 |      Return self<=value.
 |  
 |  __lt__(self, value, /)
 |      Return self<value.
 |  
 |  __ne__(self, value, /)
 |      Return self!=value.
 |  
 |  __radd__(self, value

In [48]:
date=datetime.strptime("12-4-2022","%m-%d-%Y")

In [50]:
date.strftime("%b %d")

'Dec 04'

In [57]:
group = list(range(8))
group[::2]

[0, 2, 4, 6]

In [60]:
for x1, x2 in zip(group[::2], group[1::2]):
    print(x1, x2)

0 1
2 3
4 5
6 7
