# Simulating a season with the Algorithms
In this interactive Notebook a whole soccer season is simulated using the algorithms which we are assessing.

In [1]:
# Import the libraries
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np

In [6]:
# Read the file and sort the matches by date
matches = pd.read_csv('dataset/intersection/statsbomb_matches_shots/matches_1. Bundesliga_27.csv')
matches = matches.sort_values(by='match_date')
# Read the file of the corresponding shots
shots = pd.read_csv('dataset/intersection/statsbomb_matches_shots/shots_1. Bundesliga_27.csv')

In [7]:
# Define a function which calculates the distribution of points for both teams for a match
def get_point_distribution(xgs_1, xgs_2, poi_bin_algorithm):
    # Get the Poisson-Binomial distribution for both teams
    poi_bin_1 = poi_bin_algorithm(xgs_1)
    poi_bin_2 = poi_bin_algorithm(xgs_2)
    # Select the Poisson-Binomial distribution with more cases
    if len(poi_bin_1) > len(poi_bin_2):
        lead = poi_bin_1
        follow = poi_bin_2
    else:
        lead = poi_bin_2
        follow = poi_bin_1
    # Calculate the distribution of points
    points_prob_lead = {0: 0, 1: 0, 3: 0}
    points_prob_follow = {0: 0, 1: 0, 3: 0}
    for score_lead, prob_lead in enumerate(lead):
        for score_follow, prob_follow in enumerate(follow):
            if score_lead > score_follow:
                points_prob_lead[3] += prob_lead * prob_follow
            elif score_lead == score_follow:
                points_prob_lead[1] += prob_lead * prob_follow
                points_prob_follow[1] += prob_lead * prob_follow
            else:
                points_prob_follow[3] += prob_lead * prob_follow
    # Return the result in orginal order again
    if lead == poi_bin_1:
        return points_prob_lead, points_prob_follow
    else:
        return points_prob_follow, points_prob_lead

In [8]:
# Define the algorithms to calculate the Poisson-Binomial distribution

def dp(p_is):
    # Initialize the dp table
    n = len(p_is)
    p_k = [1] + [0]*n
    # Iterate over the dp table
    for i in range(n+1):
        for c in range(i, 0, -1):
            inc = p_is[i-1]*p_k[c-1]
            p_k[c-1] -= inc
            p_k[c] += inc
    return p_k

# Dependency for the FFT algorithm
from poibin.poibin import PoiBin

def fft(p_is):
    # Call the FFT algorithm
    return PoiBin(p_is).get_pmf_xi()

def simulation(p_is, n=10000):
    # Distribution to be built
    dist = np.zeros(len(p_is)+1)
    p_is = np.array(p_is)
    # Simulate the distribution for n times
    for _ in range(n):
        # Samples from the Uniform distribution between 0 and 1
        samples = np.random.rand(len(p_is))
        # Add result to the corresponding bin
        dist[(p_is - samples > 0).sum()] += 1
    return dist/n