In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from collections import Counter
import os

In [14]:
for year in range(1956, 2024):

    # Skip covid year 2019/20
    if year == 2019:
        continue

    # Read the match data into a dataframe
    df_matches = pd.read_csv("../processed data/" + str(year) + "/uitslagen" + str(year) + ".csv")

    # Get all the clubs
    clubs = list(df_matches["Thuis"].unique())

    rows = []

    for club in clubs:
        #Before the first game of the season
        rows.append([club, 0, 0, 0, 0, 0, 0, 0, 0])

    # Create the second data frame for the eindstand
    df_eindstand = pd.DataFrame(rows, columns=["Club", "Gs", "W", "G", "V", "Pnt", "Dv", "Dt", "Ds"])

    # Loop over the matches
    for i in range(len(df_matches)):
        game = df_matches.iloc[i].to_dict()

        # Add to played games
        df_eindstand["Gs"][df_eindstand["Club"] == game["Thuis"]] += 1
        df_eindstand["Gs"][df_eindstand["Club"] == game["Uit"]] += 1

        # Add the goals for and against
        df_eindstand["Dv"][df_eindstand["Club"] == game["Thuis"]] += game["Thuis Doelpunten"]
        df_eindstand["Dt"][df_eindstand["Club"] == game["Thuis"]] += game["Uit Doelpunten"]
        df_eindstand["Dv"][df_eindstand["Club"] == game["Uit"]] += game["Uit Doelpunten"]
        df_eindstand["Dt"][df_eindstand["Club"] == game["Uit"]] += game["Thuis Doelpunten"]

        # Add the wins, draws and losses
        if game["Uitslag"] == "Gewonnen":
            df_eindstand["W"][df_eindstand["Club"] == game["Thuis"]] += 1
            df_eindstand["V"][df_eindstand["Club"] == game["Uit"]] += 1
        elif game["Uitslag"] == "Gelijk":
            df_eindstand["G"][df_eindstand["Club"] == game["Thuis"]] += 1
            df_eindstand["G"][df_eindstand["Club"] == game["Uit"]] += 1
        else:
            df_eindstand["V"][df_eindstand["Club"] == game["Thuis"]] += 1
            df_eindstand["W"][df_eindstand["Club"] == game["Uit"]] += 1
    
    # Before 1994 a different scoring system was used
    if year <= 1994:
        df_eindstand["Pnt"] = 2 * df_eindstand["W"] + df_eindstand["G"]
    else:
        df_eindstand["Pnt"] = 3 * df_eindstand["W"] + df_eindstand["G"]

    # Calculate the Goal Difference
    df_eindstand["Ds"] = df_eindstand["Dv"] - df_eindstand["Dt"] 

    # Create a dataframe with the modern scoring system
    df_eindstand_modern = df_eindstand.copy()
    df_eindstand_modern["Pnt"] = 3 * df_eindstand["W"] + df_eindstand["G"]

    # Sort the teams by points, and then by goal difference
    df_eindstand = df_eindstand.sort_values(by=["Pnt", "Ds"], ascending=False)
    df_eindstand_modern = df_eindstand_modern.sort_values(by=["Pnt", "Ds"], ascending=False)

    df_eindstand["Pos"] = list(range(1, len(clubs) + 1))
    df_eindstand_modern["Pos"] = list(range(1, len(clubs) + 1))

    # Define file paths
    eindstand_dir = f"../processed data/{year}"
    file_path_eindstand= f"{eindstand_dir}/eindstand{year}.csv"
    file_path_eindstand_modern = f"{eindstand_dir}/eindstand_modern{year}.csv"

    # Ensure directory exists
    os.makedirs(eindstand_dir, exist_ok=True)

    # Check and resolve conflicts for processed file
    if os.path.isfile(file_path_eindstand):
        os.remove(file_path_eindstand)  # Remove the conflicting file
    elif os.path.isdir(file_path_eindstand):
        import shutil
        shutil.rmtree(file_path_eindstand)  # Remove the conflicting directory

    # Write processed CSV file
    df_eindstand.to_csv(file_path_eindstand, index=False)
    df_eindstand_modern.to_csv(file_path_eindstand_modern, index=False)



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_eindstand["Gs"][df_eindstand["Club"] == game["Thuis"]] += 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_eindstand["Gs"][df_eindstand["Club"] == game["Uit"]] += 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_eindstand["Dv"][df_eindstand["Club"] == game["Thuis"]] += game["Thuis Doelpunten"]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/index

In [12]:
df_eindstand

Unnamed: 0,Club,Gs,W,G,V,Pnt,Dv,Dt,Ds,Pos
3,Ajax,34,22,5,7,49,64,40,24,1
1,Fortuna '54,34,20,5,9,45,76,48,28,2
8,SC Enschede,34,15,11,8,41,81,47,34,3
9,MVV Maastricht,34,15,10,9,40,53,42,11,4
4,PSV,34,18,3,13,39,93,71,22,5
13,Feyenoord,34,15,9,10,39,79,58,21,6
2,VVV-Venlo,34,16,6,12,38,50,53,-3,7
16,Sparta Rotterdam,34,12,12,10,36,66,59,7,8
15,NAC Breda,34,14,8,12,36,59,61,-2,9
0,DOS,34,17,1,16,35,79,75,4,10


In [11]:
df_eindstand_modern

Unnamed: 0,Club,Gs,W,G,V,Pnt,Dv,Dt,Ds,Pos
3,Ajax,34,22,5,7,71,64,40,24,1
1,Fortuna '54,34,20,5,9,65,76,48,28,2
4,PSV,34,18,3,13,57,93,71,22,3
8,SC Enschede,34,15,11,8,56,81,47,34,4
9,MVV Maastricht,34,15,10,9,55,53,42,11,5
13,Feyenoord,34,15,9,10,54,79,58,21,6
2,VVV-Venlo,34,16,6,12,54,50,53,-3,7
0,DOS,34,17,1,16,52,79,75,4,8
15,NAC Breda,34,14,8,12,50,59,61,-2,9
16,Sparta Rotterdam,34,12,12,10,48,66,59,7,10


In [None]:
# Define paths
    eindstand_dir = f"../processed data/{year}"
    file_path_eindstand= f"{eindstand_dir}/eindstand{year}.csv"
    file_path_eindstand_modern = f"{eindstand_dir}/eindstand_modern{year}.csv"

    # Ensure directories exist
    os.makedirs(eindstand_dir, exist_ok=True)

    # Check and resolve conflicts for processed file
    if os.path.isfile(file_path_eindstand):
        os.remove(file_path_eindstand)  # Remove the conflicting file
    elif os.path.isdir(file_path_eindstand):
        import shutil
        shutil.rmtree(file_path_eindstand)  # Remove the conflicting directory

    # Write processed CSV file
    df_eindstand.to_csv(file_path_eindstand, index=False)

    df_eindstand["Pnt"] = 3 * df_eindstand["W"] + df_eindstand["G"]
    df_eindstand = df_eindstand.sort_values(by=["Pnt", "Ds"], ascending=False)
    df_eindstand.to_csv(file_path_eindstand_modern, index=False)