In [2]:
%pip install pandas

Collecting pandas
  Downloading pandas-2.2.1-cp312-cp312-macosx_11_0_arm64.whl.metadata (19 kB)
Collecting numpy<2,>=1.26.0 (from pandas)
  Using cached numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl.metadata (61 kB)
Collecting pytz>=2020.1 (from pandas)
  Downloading pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.2.1-cp312-cp312-macosx_11_0_arm64.whl (11.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.3/11.3 MB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hUsing cached numpy-1.26.4-cp312-cp312-macosx_11_0_arm64.whl (13.7 MB)
Downloading pytz-2024.1-py2.py3-none-any.whl (505 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m505.5/505.5 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading tzdata-2024.1-py2.py3-none-any.whl (345 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
import pandas as pd

def process_csv(csv_file):
    # Read CSV file into a DataFrame
    df = pd.read_csv(csv_file, delimiter=',', encoding='utf-8')
    
    # Rename columns for clarity
    df.columns = [
        'Year', 'DateTime', 'Stage', 'Stadium', 'City',
        'Home_Team', 'Home_Goals', 'Away_Goals', 'Away_Team',
        'ab', 'aa', 'a', 'b',
        'c', 'd', 'e', 'f',
        'g','h','bb'
    ]
    df = df[['Year', 'Home_Team', 'Home_Goals', 'Away_Goals', 'Away_Team']]
    

    # Group by country and year, summing up goals
    goals_by_country = df.groupby(['Year', 'Home_Team'])['Home_Goals'].sum().reset_index()
    goals_by_country = goals_by_country.rename(columns={'Home_Team': 'Country', 'Home_Goals': 'Goals'})
    away_goals_by_country = df.groupby(['Year', 'Away_Team'])['Away_Goals'].sum().reset_index()
    away_goals_by_country = away_goals_by_country.rename(columns={'Away_Team': 'Country', 'Away_Goals': 'Goals'})
    goals_by_country = pd.concat([goals_by_country, away_goals_by_country], ignore_index=True)
    goals_by_country = goals_by_country.groupby(['Year', 'Country'])['Goals'].sum().reset_index()
    goals_by_country['Cumulative_Goals'] = goals_by_country.sort_values('Year').groupby('Country')['Goals'].cumsum()
    return goals_by_country

def print_results(goals_by_country):
    # Sort countries by total goals scored
    sorted_countries = goals_by_country.sort_values(by='Goals', ascending=False)

    # Print results
    for idx, row in sorted_countries.iterrows():
        print(f"Year: {row['Year']}, Country: {row['Country']}, Goals: {row['Cumulative_Goals']}")

if __name__ == "__main__":
    csv_file = "data/WorldCupMatches.csv"
    goals_by_country = process_csv(csv_file)
    print_results(goals_by_country)


Year: 2014.0, Country: Germany, Goals: 104.0
Year: 1954.0, Country: Hungary, Goals: 47.0
Year: 1954.0, Country: Germany FR, Goals: 25.0
Year: 1958.0, Country: France, Goals: 36.0
Year: 1950.0, Country: Brazil, Goals: 42.0
Year: 2014.0, Country: Netherlands, Goals: 91.0
Year: 1970.0, Country: Brazil, Goals: 103.0
Year: 2002.0, Country: Brazil, Goals: 191.0
Year: 1930.0, Country: Argentina, Goals: 18.0
Year: 1970.0, Country: Germany FR, Goals: 73.0
Year: 1966.0, Country: Portugal, Goals: 17.0
Year: 1954.0, Country: Austria, Goals: 24.0
Year: 1974.0, Country: Poland, Goals: 21.0
Year: 2010.0, Country: Germany, Goals: 75.0
Year: 1958.0, Country: Brazil, Goals: 66.0
Year: 1982.0, Country: France, Goals: 59.0
Year: 1954.0, Country: Uruguay, Goals: 46.0
Year: 1982.0, Country: Brazil, Goals: 134.0
Year: 1994.0, Country: Sweden, Goals: 66.0
Year: 1950.0, Country: Uruguay, Goals: 30.0
Year: 1998.0, Country: France, Goals: 86.0
Year: 1974.0, Country: Netherlands, Goals: 17.0
Year: 1978.0, Country

In [4]:
# Sort the DataFrame by 'Year'
sorted_df = goals_by_country.sort_values(by='Year')

# Write the DataFrame to a CSV file
# Rename the columns
sorted_df = sorted_df.rename(columns={'Year': 'date', 'Country': 'country', 'Cumulative_Goals': 'goals'})

# Write the DataFrame to a CSV file
sorted_df.to_csv('data/race_chart_data.csv', columns=['date', 'country', 'goals'], index=False)
