In [41]:
import pandas as pd
import plotly.express as px
import numpy as np
import scipy 

In [42]:
df_results = pd.read_csv('files/input/kaggle/results.csv')
df_shootouts = pd.read_csv('files/input/kaggle/shootouts.csv')

Data Prep

In [43]:
continents = {
    "Afghanistan": "Asia",
    "Egypt": "Africa",
    "Albania": "Europe",
    "Algeria": "Africa",
    "American Samoa": "Oceania",
    "Andorra": "Europe",
    "Angola": "Africa",
    "Anguilla": "North America",
    "Antigua and Barbuda": "North America",
    "Equatorial Guinea": "Africa",
    "Argentina": "South America",
    "Armenia": "Asia",
    "Aruba": "North America",
    "Azerbaijan": "Asia",
    "Ethiopia": "Africa",
    "Australia": "Oceania",
    "Bahamas": "North America",
    "Bahrain": "Asia",
    "Bangladesh": "Asia",
    "Barbados": "North America",
    "Belarus": "Europe",
    "Belgium": "Europe",
    "Belize": "North America",
    "Benin": "Africa",
    "Bermuda": "North America",
    "Bhutan": "Asia",
    "Bolivia": "South America",
    "Bosnia and Herzegovina": "Europe",
    "Botswana": "Africa",
    "Brazil": "South America",
    "Brunei Darussalam": "Asia",
    "Bulgaria": "Europe",
    "Burkina Faso": "Africa",
    "Burundi": "Africa",
    "Cayman Islands": "North America",
    "Chile": "South America",
    "China": "Asia",
    "Cook Islands": "Oceania",
    "Costa Rica": "North America",
    "Curaçao": "North America",
    "Denmark": "Europe",
    "Germany": "Europe",
    "Dominica": "North America",
    "Dominican Republic": "North America",
    "Djibouti": "Africa",
    "Ecuador": "South America",
    "El Salvador": "North America",
    "Ivory Coast": "Africa",
    "England": "Europe",
    "Eritrea": "Africa",
    "Estonia": "Europe",
    "Eswatini": "Africa",
    "Faroe Islands": "Europe",
    "Fiji": "Oceania",
    "Finland": "Europe",
    "France": "Europe",
    "Gabon": "Africa",
    "Gambia": "Africa",
    "Georgia": "Asia",
    "Ghana": "Africa",
    "Gibraltar": "Europe",
    "Grenada": "North America",
    "Greece": "Europe",
    "Guam": "Oceania",
    "Guatemala": "North America",
    "Guyana": "South America",
    "Guinea": "Africa",
    "Guinea-Bissau": "Africa",
    "Haiti": "North America",
    "Honduras": "North America",
    "Hong Kong": "Asia",
    "India": "Asia",
    "Indonesia": "Asia",
    "Iraq": "Asia",
    "Iran": "Asia",
    "Ireland": "Europe",
    "Iceland": "Europe",
    "Israel": "Asia",
    "Italy": "Europe",
    "Jamaica": "North America",
    "Japan": "Asia",
    "Yemen": "Asia",
    "Jordan": "Asia",
    "U.S. Virgin Islands": "North America",
    "British Virgin Islands": "North America",
    "Cambodia": "Asia",
    "Cameroon": "Africa",
    "Canada": "North America",
    "Cape Verde": "Africa",
    "Kazakhstan": "Asia",
    "Qatar": "Asia",
    "Kenya": "Africa",
    "Kyrgyzstan": "Asia",
    "Colombia": "South America",
    "Comoros": "Africa",
    "Kosovo": "Europe",
    "Democratic Republic of the Congo": "Africa",
    "Republic of the Congo": "Africa",
    "North Korea": "Asia",
    "South Korea": "Asia",
    "Croatia": "Europe",
    "Cuba": "North America",
    "Kuwait": "Asia",
    "Laos": "Asia",
    "Lesotho": "Africa",
    "Latvia": "Europe",
    "Lebanon": "Asia",
    "Liberia": "Africa",
    "Libya": "Africa",
    "Liechtenstein": "Europe",
    "Lithuania": "Europe",
    "Luxembourg": "Europe",
    "Macau": "Asia",
    "Madagascar": "Africa",
    "Malawi": "Africa",
    "Malaysia": "Asia",
    "Maldives": "Asia",
    "Mali": "Africa",
    "Malta": "Europe",
    "Morocco": "Africa",
    "Mauritania": "Africa",
    "Mauritius": "Africa",
    "Mexico": "North America",
    "Moldova": "Europe",
    "Mongolia": "Asia",
    "Montenegro": "Europe",
    "Montserrat": "North America",
    "Mozambique": "Africa",
    "Myanmar": "Asia",
    "Namibia": "Africa",
    "Nepal": "Asia",
    "New Caledonia": "Oceania",
    "New Zealand": "Oceania",
    "Nicaragua": "North America",
    "Netherlands": "Europe",
    "Niger": "Africa",
    "Nigeria": "Africa",
    "Northern Ireland": "Europe",
    "North Macedonia": "Europe",
    "Norway": "Europe",
    "Oman": "Asia",
    "Austria": "Europe",
    "East Timor": "Asia",
    "Pakistan": "Asia",
    "Palestine": "Asia",
    "Panama": "North America",
    "Papua New Guinea": "Oceania",
    "Paraguay": "South America",
    "Peru": "South America",
    "Philippines": "Asia",
    "Poland": "Europe",
    "Portugal": "Europe",
    "Puerto Rico": "North America",
    "Rwanda": "Africa",
    "Romania": "Europe",
    "Russia": "Europe",
    "Saint Kitts and Nevis": "North America",
    "Saint Lucia": "North America",
    "Saint Vincent and the Grenadines": "North America",
    "Solomon Islands": "Oceania",
    "Zambia": "Africa",
    "Samoa": "Oceania",
    "San Marino": "Europe",
    "Sao Tome and Principe": "Africa",
    "Saudi Arabia": "Asia",
    "Scotland": "Europe",
    "Sweden": "Europe",
    "Switzerland": "Europe",
    "Senegal": "Africa",
    "Serbia": "Europe",
    "Seychelles": "Africa",
    "Sierra Leone": "Africa",
    "Zimbabwe": "Africa",
    "Singapore": "Asia",
    "Slovakia": "Europe",
    "Slovenia": "Europe",
    "Somalia": "Africa",
    "Spain": "Europe",
    "Sri Lanka": "Asia",
    "South Africa": "Africa",
    "Sudan": "Africa",
    "South Sudan": "Africa",
    "Suriname": "South America",
    "Syria": "Asia",
    "Tajikistan": "Asia",
    "Tahiti": "Oceania",
    "Taiwan": "Asia",
    "Tanzania": "Africa",
    "Thailand": "Asia",
    "Togo": "Africa",
    "Tonga": "Oceania",
    "Trinidad and Tobago": "North America",
    "Chad": "Africa",
    "Czech Republic": "Europe",
    "Tunisia": "Africa",
    "Turkey": "Asia",
    "Turkmenistan": "Asia",
    "Turks and Caicos Islands": "North America",
    "Uganda": "Africa",
    "Ukraine": "Europe",
    "Hungary": "Europe",
    "Uruguay": "South America",
    "Uzbekistan": "Asia",
    "Vanuatu": "Oceania",
    "Venezuela": "South America",
    "United Arab Emirates": "Asia",
    "United States": "North America",
    "Vietnam": "Asia",
    "Wales": "Europe",
    "Central African Republic": "Africa",
    "Cyprus": "Asia"
}

In [44]:
# Add home_continent and away_continent columns
df_results['home_continent'] = df_results['home_team'].map(continents)
df_results['away_continent'] = df_results['away_team'].map(continents)

In [45]:
#Fill na-values
df_results["home_continent"] = df_results["home_continent"].fillna("Country not in Fifa Ranking")
df_results["away_continent"] = df_results["away_continent"].fillna("Country not in Fifa Ranking")

In [46]:
#drop na-values
no_country_in_fifa_ranking_both = df_results[(df_results["home_continent"] == "Country not in Fifa Ranking") | (df_results["away_continent"] == "Country not in Fifa Ranking")]

df_results = df_results.drop(no_country_in_fifa_ranking_both.index)

In [47]:
#transform date column into date-object
df_results["date"] = pd.to_datetime(df_results["date"])
#collect all games for the coming em 2024, because there aren't any results yet
df_em2024 = df_results[(df_results["tournament"] == "UEFA Euro") & (df_results["date"].dt.year == 2024)]
#drop em 2024 games
df_results = df_results.drop(df_em2024.index)

In [48]:
#add the column "winning_country" by comparing the home and away goals, also registering draws
df_results["winning_country"] = df_results.apply(lambda x: x["home_team"] if x["home_score"] > x["away_score"] else "Unentschieden" if x["home_score"] == x["away_score"] else x["away_team"], axis=1)

In [49]:
#add the column "winning_continent" by comparing the home and away goals, also registering draws
df_results["winning_continent"] = df_results.apply(lambda x: x["home_continent"] if x["home_score"] > x["away_score"] else "Unentschieden" if x["home_score"] == x["away_score"] else x["away_continent"], axis=1)

In [50]:
df_results["winning_country_is_home_team"] = df_results.apply(lambda x: True if x["home_team"] == x["winning_country"] else False, axis=1)

In [51]:
#add the columns home and away continent to the shootouts table
df_shootouts['home_continent'] = df_shootouts['home_team'].map(continents)
df_shootouts['away_continent'] = df_shootouts['away_team'].map(continents)

In [52]:
#fill na-values
df_shootouts["home_continent"] = df_shootouts["home_continent"].fillna("Country not in Fifa Ranking")
df_shootouts["away_continent"] = df_shootouts["away_continent"].fillna("Country not in Fifa Ranking")

In [53]:
#drop na-values
no_country_in_fifa_ranking_both = df_shootouts[(df_shootouts["home_continent"] == "Country not in Fifa Ranking") | (df_shootouts["away_continent"] == "Country not in Fifa Ranking")]

df_shootouts = df_shootouts.drop(no_country_in_fifa_ranking_both.index)

In [54]:
# Erstellen einer Mapping Series für winner zu winning_continent
winner_to_continent = df_shootouts['winner'].map(continents)

# Hinzufügen der Spalte winning_continent zum DataFrame
df_shootouts['winning_continent'] = winner_to_continent

In [55]:
df_results_distinct_continent = df_results[df_results['home_continent'] != df_results['away_continent']]

In [56]:
df_shootouts_distinct_continents = df_shootouts[df_shootouts["home_continent"] != df_shootouts["away_continent"]]

In [57]:
df_results_neutral_continents = df_results_distinct_continent[df_results_distinct_continent["neutral"] == True]

In [58]:
df_results_not_neutral_continents = df_results_distinct_continent[df_results_distinct_continent["neutral"] == False]

In [59]:
df_results_neutral_countries = df_results[df_results["neutral"] == True]

In [60]:
df_results_not_neutral_countries = df_results[df_results["neutral"] == False]

Analysis

Länder

In [61]:


# Anzahl der Heim- und Auswärtsspiele für jedes Team zählen
total_matches_home = df_results['home_team'].value_counts()
total_matches_away = df_results['away_team'].value_counts()

# Gesamtanzahl der Spiele durch Addition der Heim- und Auswärtsspiele für jedes Team zählen
total_matches = total_matches_home.add(total_matches_away, fill_value=0).sort_values(ascending=False)

# Anzahl der Heim- und Auswärtssiege für jedes Team zählen
home_wins = df_results[df_results['home_score'] > df_results['away_score']]['home_team'].value_counts()
away_wins = df_results[df_results['away_score'] > df_results['home_score']]['away_team'].value_counts()

# Gesamtanzahl der Siege durch Addition der Heim- und Auswärtssiege für jedes Team zählen
total_wins = home_wins.add(away_wins, fill_value=0).sort_values(ascending=False)

# Gewinnprozentsätze für Heim-, Auswärts- und Gesamtspiele berechnen
home_win_percentages = (home_wins / total_matches_home * 100).sort_values(ascending=False)
away_win_percentages = (away_wins / total_matches_away * 100).sort_values(ascending=False)
total_win_percentages = (total_wins / total_matches * 100).sort_values(ascending=False)

# Ergebnisse in einem DataFrame zusammenfassen
results = pd.DataFrame({
    'Total Matches': total_matches,
    'Home Matches': total_matches_home,
    'Away Matches': total_matches_away,
    'Total Wins': total_wins,
    'Home Wins': home_wins,
    'Away Wins': away_wins,
    'Total Win %': total_win_percentages,
    'Home Win %': home_win_percentages,
    'Away Win %': away_win_percentages
}).fillna(0)

# DataFrame nach Gesamtgewinnrate sortieren und anzeigen
results_sorted_by_total_win_rate = results.sort_values(by='Total Win %', ascending=False).head(10)
print("DataFrame nach Gesamtgewinnrate sortiert:")
display(results_sorted_by_total_win_rate)

# DataFrame nach Heimgewinnrate sortieren und anzeigen
results_sorted_by_home_win_rate = results.sort_values(by='Home Win %', ascending=False).head(10)
print("\nDataFrame nach Heimgewinnrate sortiert:")
display(results_sorted_by_home_win_rate)

# DataFrame nach Auswärtsgewinnrate sortieren und anzeigen
results_sorted_by_away_win_rate = results.sort_values(by='Away Win %', ascending=False).head(10)
print("\nDataFrame nach Auswärtsgewinnrate sortiert:")
display(results_sorted_by_away_win_rate)

# DataFrame für Länder mit den meisten gespielten Matches, absteigend sortiert nach Gesamtmatches
most_matches_df = results.sort_values(by='Total Matches', ascending=False).reset_index().rename(columns={'index': 'Country'}).head(10)
print("\nLänder mit den meisten gespielten Matches:")
display(most_matches_df[['Country', 'Total Matches']])


DataFrame nach Gesamtgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Brazil,983,564,419,631.0,407.0,224.0,64.191251,72.163121,53.460621
Spain,687,355,332,404.0,244.0,160.0,58.806405,68.732394,48.192771
Germany,934,496,438,544.0,307.0,237.0,58.244111,61.895161,54.109589
England,1012,506,506,582.0,316.0,266.0,57.509881,62.450593,52.56917
Iran,557,317,240,319.0,202.0,117.0,57.271095,63.722397,48.75
Argentina,1016,573,443,558.0,385.0,173.0,54.92126,67.190227,39.051919
Italy,796,437,359,430.0,278.0,152.0,54.020101,63.615561,42.339833
Tahiti,214,104,110,115.0,63.0,52.0,53.738318,60.576923,47.272727
Croatia,363,178,185,193.0,106.0,87.0,53.168044,59.550562,47.027027
South Korea,930,511,419,493.0,306.0,187.0,53.010753,59.882583,44.630072



DataFrame nach Heimgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Brazil,983,564,419,631.0,407.0,224.0,64.191251,72.163121,53.460621
Spain,687,355,332,404.0,244.0,160.0,58.806405,68.732394,48.192771
Argentina,1016,573,443,558.0,385.0,173.0,54.92126,67.190227,39.051919
Egypt,684,401,283,343.0,265.0,78.0,50.146199,66.084788,27.561837
Ivory Coast,563,287,276,285.0,187.0,98.0,50.62167,65.156794,35.507246
Czech Republic,335,162,173,177.0,105.0,72.0,52.835821,64.814815,41.618497
Iran,557,317,240,319.0,202.0,117.0,57.271095,63.722397,48.75
Italy,796,437,359,430.0,278.0,152.0,54.020101,63.615561,42.339833
New Caledonia,238,131,107,121.0,83.0,38.0,50.840336,63.358779,35.514019
Ghana,610,291,319,280.0,183.0,97.0,45.901639,62.886598,30.407524



DataFrame nach Auswärtsgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Germany,934,496,438,544.0,307.0,237.0,58.244111,61.895161,54.109589
Brazil,983,564,419,631.0,407.0,224.0,64.191251,72.163121,53.460621
England,1012,506,506,582.0,316.0,266.0,57.509881,62.450593,52.56917
Iran,557,317,240,319.0,202.0,117.0,57.271095,63.722397,48.75
Spain,687,355,332,404.0,244.0,160.0,58.806405,68.732394,48.192771
Tahiti,214,104,110,115.0,63.0,52.0,53.738318,60.576923,47.272727
Croatia,363,178,185,193.0,106.0,87.0,53.168044,59.550562,47.027027
South Korea,930,511,419,493.0,306.0,187.0,53.010753,59.882583,44.630072
Netherlands,791,429,362,405.0,247.0,158.0,51.201011,57.575758,43.646409
Russia,666,292,374,338.0,177.0,161.0,50.750751,60.616438,43.048128



Länder mit den meisten gespielten Matches:


Unnamed: 0,Country,Total Matches
0,Sweden,1027
1,Argentina,1016
2,England,1012
3,Brazil,983
4,Germany,934
5,Mexico,932
6,South Korea,930
7,Uruguay,912
8,Hungary,877
9,France,825


In [62]:


# Anzahl der Heim- und Auswärtsspiele für jedes Team zählen
total_matches_home = df_results_neutral_countries['home_team'].value_counts()
total_matches_away = df_results_neutral_countries['away_team'].value_counts()

# Gesamtanzahl der Spiele durch Addition der Heim- und Auswärtsspiele für jedes Team zählen
total_matches = total_matches_home.add(total_matches_away, fill_value=0).sort_values(ascending=False)

# Anzahl der Heim- und Auswärtssiege für jedes Team zählen
home_wins = df_results_neutral_countries[df_results_neutral_countries['home_score'] > df_results_neutral_countries['away_score']]['home_team'].value_counts()
away_wins = df_results_neutral_countries[df_results_neutral_countries['away_score'] > df_results_neutral_countries['home_score']]['away_team'].value_counts()

# Gesamtanzahl der Siege durch Addition der Heim- und Auswärtssiege für jedes Team zählen
total_wins = home_wins.add(away_wins, fill_value=0).sort_values(ascending=False)

# Gewinnprozentsätze für Heim-, Auswärts- und Gesamtspiele berechnen
home_win_percentages = (home_wins / total_matches_home * 100).sort_values(ascending=False)
away_win_percentages = (away_wins / total_matches_away * 100).sort_values(ascending=False)
total_win_percentages = (total_wins / total_matches * 100).sort_values(ascending=False)

# Ergebnisse in einem DataFrame zusammenfassen
results = pd.DataFrame({
    'Total Matches': total_matches,
    'Home Matches': total_matches_home,
    'Away Matches': total_matches_away,
    'Total Wins': total_wins,
    'Home Wins': home_wins,
    'Away Wins': away_wins,
    'Total Win %': total_win_percentages,
    'Home Win %': home_win_percentages,
    'Away Win %': away_win_percentages
}).fillna(0)

# DataFrame nach Gesamtgewinnrate sortieren und anzeigen
results_sorted_by_total_win_rate = results.sort_values(by='Total Win %', ascending=False).head(10)
print("DataFrame nach Gesamtgewinnrate sortiert:")
display(results_sorted_by_total_win_rate)

# DataFrame nach Heimgewinnrate sortieren und anzeigen
results_sorted_by_home_win_rate = results.sort_values(by='Home Win %', ascending=False).head(10)
print("\nDataFrame nach Heimgewinnrate sortiert:")
display(results_sorted_by_home_win_rate)

# DataFrame nach Auswärtsgewinnrate sortieren und anzeigen
results_sorted_by_away_win_rate = results.sort_values(by='Away Win %', ascending=False).head(10)
print("\nDataFrame nach Auswärtsgewinnrate sortiert:")
display(results_sorted_by_away_win_rate)

# DataFrame für Länder mit den meisten gespielten Matches, absteigend sortiert nach Gesamtmatches
most_matches_df = results.sort_values(by='Total Matches', ascending=False).reset_index().rename(columns={'index': 'Country'}).head(10)
print("\nLänder mit den meisten gespielten Matches:")
display(most_matches_df[['Country', 'Total Matches']])


DataFrame nach Gesamtgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Brazil,328.0,232.0,96.0,218.0,162.0,56.0,66.463415,69.827586,58.333333
Tahiti,112.0,66.0,46.0,71.0,44.0,27.0,63.392857,66.666667,58.695652
Argentina,292.0,234.0,58.0,183.0,160.0,23.0,62.671233,68.376068,39.655172
France,107.0,56.0,51.0,59.0,35.0,24.0,55.140187,62.5,47.058824
Germany,147.0,87.0,60.0,81.0,51.0,30.0,55.102041,58.62069,50.0
New Caledonia,109.0,58.0,51.0,60.0,38.0,22.0,55.045872,65.517241,43.137255
Israel,40.0,27.0,13.0,22.0,15.0,7.0,55.0,55.555556,53.846154
Spain,122.0,56.0,66.0,67.0,32.0,35.0,54.918033,57.142857,53.030303
Australia,132.0,85.0,47.0,72.0,51.0,21.0,54.545455,60.0,44.680851
Portugal,101.0,44.0,57.0,55.0,27.0,28.0,54.455446,61.363636,49.122807



DataFrame nach Heimgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Brazil,328.0,232.0,96.0,218.0,162.0,56.0,66.463415,69.827586,58.333333
Argentina,292.0,234.0,58.0,183.0,160.0,23.0,62.671233,68.376068,39.655172
Montenegro,5.0,3.0,2.0,2.0,2.0,0.0,40.0,66.666667,0.0
Samoa,20.0,9.0,11.0,7.0,6.0,1.0,35.0,66.666667,9.090909
Tahiti,112.0,66.0,46.0,71.0,44.0,27.0,63.392857,66.666667,58.695652
New Caledonia,109.0,58.0,51.0,60.0,38.0,22.0,55.045872,65.517241,43.137255
Russia,121.0,54.0,67.0,65.0,35.0,30.0,53.719008,64.814815,44.776119
Saint Vincent and the Grenadines,46.0,14.0,32.0,18.0,9.0,9.0,39.130435,64.285714,28.125
France,107.0,56.0,51.0,59.0,35.0,24.0,55.140187,62.5,47.058824
Egypt,200.0,137.0,63.0,100.0,85.0,15.0,50.0,62.043796,23.809524



DataFrame nach Auswärtsgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Netherlands,92.0,53.0,39.0,49.0,26.0,23.0,53.26087,49.056604,58.974359
Tahiti,112.0,66.0,46.0,71.0,44.0,27.0,63.392857,66.666667,58.695652
Brazil,328.0,232.0,96.0,218.0,162.0,56.0,66.463415,69.827586,58.333333
Israel,40.0,27.0,13.0,22.0,15.0,7.0,55.0,55.555556,53.846154
Uruguay,232.0,62.0,170.0,116.0,25.0,91.0,50.0,40.322581,53.529412
Uzbekistan,94.0,36.0,58.0,51.0,20.0,31.0,54.255319,55.555556,53.448276
Spain,122.0,56.0,66.0,67.0,32.0,35.0,54.918033,57.142857,53.030303
Germany,147.0,87.0,60.0,81.0,51.0,30.0,55.102041,58.62069,50.0
Tajikistan,58.0,30.0,28.0,30.0,16.0,14.0,51.724138,53.333333,50.0
Mexico,416.0,278.0,138.0,209.0,141.0,68.0,50.240385,50.719424,49.275362



Länder mit den meisten gespielten Matches:


Unnamed: 0,Country,Total Matches
0,Mexico,416.0
1,South Korea,404.0
2,Brazil,328.0
3,Iraq,308.0
4,Thailand,301.0
5,Argentina,292.0
6,Saudi Arabia,289.0
7,Indonesia,285.0
8,Japan,280.0
9,Myanmar,263.0


In [63]:


# Anzahl der Heim- und Auswärtsspiele für jedes Team zählen
total_matches_home = df_results_not_neutral_countries['home_team'].value_counts()
total_matches_away = df_results_not_neutral_countries['away_team'].value_counts()

# Gesamtanzahl der Spiele durch Addition der Heim- und Auswärtsspiele für jedes Team zählen
total_matches = total_matches_home.add(total_matches_away, fill_value=0).sort_values(ascending=False)

# Anzahl der Heim- und Auswärtssiege für jedes Team zählen
home_wins = df_results_not_neutral_countries[df_results_not_neutral_countries['home_score'] > df_results_not_neutral_countries['away_score']]['home_team'].value_counts()
away_wins = df_results_not_neutral_countries[df_results_not_neutral_countries['away_score'] > df_results_not_neutral_countries['home_score']]['away_team'].value_counts()

# Gesamtanzahl der Siege durch Addition der Heim- und Auswärtssiege für jedes Team zählen
total_wins = home_wins.add(away_wins, fill_value=0).sort_values(ascending=False)

# Gewinnprozentsätze für Heim-, Auswärts- und Gesamtspiele berechnen
home_win_percentages = (home_wins / total_matches_home * 100).sort_values(ascending=False)
away_win_percentages = (away_wins / total_matches_away * 100).sort_values(ascending=False)
total_win_percentages = (total_wins / total_matches * 100).sort_values(ascending=False)

# Ergebnisse in einem DataFrame zusammenfassen
results = pd.DataFrame({
    'Total Matches': total_matches,
    'Home Matches': total_matches_home,
    'Away Matches': total_matches_away,
    'Total Wins': total_wins,
    'Home Wins': home_wins,
    'Away Wins': away_wins,
    'Total Win %': total_win_percentages,
    'Home Win %': home_win_percentages,
    'Away Win %': away_win_percentages
}).fillna(0)

# DataFrame nach Gesamtgewinnrate sortieren und anzeigen
results_sorted_by_total_win_rate = results.sort_values(by='Total Win %', ascending=False).head(10)
print("DataFrame nach Gesamtgewinnrate sortiert:")
display(results_sorted_by_total_win_rate)

# DataFrame nach Heimgewinnrate sortieren und anzeigen
results_sorted_by_home_win_rate = results.sort_values(by='Home Win %', ascending=False).head(10)
print("\nDataFrame nach Heimgewinnrate sortiert:")
display(results_sorted_by_home_win_rate)

# DataFrame nach Auswärtsgewinnrate sortieren und anzeigen
results_sorted_by_away_win_rate = results.sort_values(by='Away Win %', ascending=False).head(10)
print("\nDataFrame nach Auswärtsgewinnrate sortiert:")
display(results_sorted_by_away_win_rate)

# DataFrame für Länder mit den meisten gespielten Matches, absteigend sortiert nach Gesamtmatches
most_matches_df = results.sort_values(by='Total Matches', ascending=False).reset_index().rename(columns={'index': 'Country'}).head(10)
print("\nLänder mit den meisten gespielten Matches:")
display(most_matches_df[['Country', 'Total Matches']])


DataFrame nach Gesamtgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Brazil,655.0,332.0,323,413.0,245.0,168.0,63.053435,73.795181,52.012384
Iran,353.0,199.0,154,218.0,141.0,77.0,61.756374,70.854271,50.0
England,903.0,454.0,449,539.0,293.0,246.0,59.689922,64.537445,54.788419
Spain,565.0,299.0,266,337.0,212.0,125.0,59.646018,70.90301,46.992481
Germany,787.0,409.0,378,463.0,256.0,207.0,58.831004,62.591687,54.761905
Italy,648.0,356.0,292,364.0,240.0,124.0,56.17284,67.41573,42.465753
Croatia,295.0,145.0,150,165.0,95.0,70.0,55.932203,65.517241,46.666667
South Korea,526.0,319.0,207,289.0,199.0,90.0,54.942966,62.382445,43.478261
Czech Republic,281.0,135.0,146,151.0,90.0,61.0,53.736655,66.666667,41.780822
Mexico,516.0,272.0,244,271.0,186.0,85.0,52.51938,68.382353,34.836066



DataFrame nach Heimgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Brazil,655.0,332.0,323,413.0,245.0,168.0,63.053435,73.795181,52.012384
Spain,565.0,299.0,266,337.0,212.0,125.0,59.646018,70.90301,46.992481
Iran,353.0,199.0,154,218.0,141.0,77.0,61.756374,70.854271,50.0
Guinea,327.0,125.0,202,123.0,87.0,36.0,37.614679,69.6,17.821782
Ivory Coast,378.0,189.0,189,190.0,130.0,60.0,50.26455,68.783069,31.746032
Mexico,516.0,272.0,244,271.0,186.0,85.0,52.51938,68.382353,34.836066
Iraq,285.0,101.0,184,126.0,69.0,57.0,44.210526,68.316832,30.978261
Egypt,484.0,264.0,220,243.0,180.0,63.0,50.206612,68.181818,28.636364
Ghana,410.0,182.0,228,191.0,124.0,67.0,46.585366,68.131868,29.385965
Nigeria,415.0,193.0,222,197.0,131.0,66.0,47.46988,67.875648,29.72973



DataFrame nach Auswärtsgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
England,903.0,454.0,449,539.0,293.0,246.0,59.689922,64.537445,54.788419
Germany,787.0,409.0,378,463.0,256.0,207.0,58.831004,62.591687,54.761905
Brazil,655.0,332.0,323,413.0,245.0,168.0,63.053435,73.795181,52.012384
Iran,353.0,199.0,154,218.0,141.0,77.0,61.756374,70.854271,50.0
Spain,565.0,299.0,266,337.0,212.0,125.0,59.646018,70.90301,46.992481
Croatia,295.0,145.0,150,165.0,95.0,70.0,55.932203,65.517241,46.666667
South Korea,526.0,319.0,207,289.0,199.0,90.0,54.942966,62.382445,43.478261
Russia,545.0,238.0,307,273.0,142.0,131.0,50.091743,59.663866,42.67101
Italy,648.0,356.0,292,364.0,240.0,124.0,56.17284,67.41573,42.465753
Netherlands,699.0,376.0,323,356.0,221.0,135.0,50.9299,58.776596,41.795666



Länder mit den meisten gespielten Matches:


Unnamed: 0,Country,Total Matches
0,Sweden,912.0
1,England,903.0
2,Hungary,815.0
3,Germany,787.0
4,Norway,755.0
5,Scotland,750.0
6,Denmark,728.0
7,Argentina,724.0
8,Switzerland,721.0
9,France,718.0


In [64]:
# # # Filter matches from 2000 to 2023
# matches_2000_to_2023 = df_results[(df_results['date'] >= '2000-01-01') & (df_results['date'] <= '2023-12-31')]

# # # Group by team and calculate overall performance metrics
# team_performance_2000_to_2023 = matches_2000_to_2023.groupby('home_team').agg({
#      'home_score': 'mean',
#      'away_score': 'mean',
#      'tournament': 'count',
#      'neutral': 'sum'
#  }).reset_index()

# team_performance_2000_to_2023.columns = ['Team', 'Average_Home_Score', 'Average_Away_Score', 'Matches_Played', 'Neutral_Ground']

#  # Calculate overall score by giving more weight to tournament matches
# team_performance_2000_to_2023['Overall_Score'] = team_performance_2000_to_2023['Matches_Played'] + 2 * team_performance_2000_to_2023['Neutral_Ground']

#  # Sort teams by overall score in descending order
# best_teams_2000_to_2023 = team_performance_2000_to_2023.sort_values(by='Overall_Score', ascending=False).head(10)
# # # Bar Chart for the Best Teams from 2000 to 2023
# fig_best_teams_2000_to_2023 = px.bar(best_teams_2000_to_2023, x='Team', y='Overall_Score',
#                                       title='Best Teams from 2000 to 2023',
#                                       labels={'Team': 'Football Team', 'Overall_Score': 'Overall Score'})
# fig_best_teams_2000_to_2023.show()

In [65]:


# Anzahl der Heimspiele und Heimtore pro Land
home_stats = df_results.groupby('home_team').agg(
    home_goals=('home_score', 'sum'),
    home_games=('home_score', 'count')
).reset_index()
home_stats.columns = ['team', 'home_goals', 'home_games']

# Anzahl der Auswärtsspiele und Auswärtstore pro Land
away_stats = df_results.groupby('away_team').agg(
    away_goals=('away_score', 'sum'),
    away_games=('away_score', 'count')
).reset_index()
away_stats.columns = ['team', 'away_goals', 'away_games']

# Zusammenführen der Heim- und Auswärtsstatistiken
total_stats = pd.merge(home_stats, away_stats, on='team', how='outer').fillna(0)

# Berechnung der Gesamtzahl der Tore und Spiele
total_stats['total_goals'] = total_stats['home_goals'] + total_stats['away_goals']
total_stats['total_games'] = total_stats['home_games'] + total_stats['away_games']

# Berechnung der durchschnittlichen Tore pro Spiel
total_stats['avg_goals_per_game'] = total_stats['total_goals'] / total_stats['total_games']

# Berechnung der durchschnittlichen Heim- und Auswärtstore pro Spiel
total_stats['avg_home_goals_per_game'] = total_stats['home_goals'] / total_stats['home_games']
total_stats['avg_away_goals_per_game'] = total_stats['away_goals'] / total_stats['away_games']

# Sortieren nach durchschnittlichen Toren pro Spiel in absteigender Reihenfolge
total_stats = total_stats.sort_values(by='avg_goals_per_game', ascending=False)

# Runden der durchschnittlichen Tore pro Spiel auf 2 Dezimalstellen
total_stats['avg_goals_per_game'] = total_stats['avg_goals_per_game'].round(2)
total_stats['avg_home_goals_per_game'] = total_stats['avg_home_goals_per_game'].round(2)
total_stats['avg_away_goals_per_game'] = total_stats['avg_away_goals_per_game'].round(2)

# Ausgabe der Top 10 Teams nach durchschnittlichen Toren pro Spiel
top_avg_goals = total_stats.head(10)
print("Top 10 Teams nach durchschnittlichen Toren pro Spiel:")
display(top_avg_goals)

# Sortieren nach durchschnittlichen Heimtoren pro Spiel
sorted_by_avg_home_goals = total_stats.sort_values(by='avg_home_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:")
display(sorted_by_avg_home_goals)

# Sortieren nach durchschnittlichen Auswärtstoren pro Spiel
sorted_by_avg_away_goals = total_stats.sort_values(by='avg_away_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:")
display(sorted_by_avg_away_goals)

# Erstellen eines DataFrames für Länder mit den meisten erzielten Toren
most_goals_df = total_stats.sort_values(by='total_goals', ascending=False).reset_index(drop=True).head(10)
print("Länder mit den meisten erzielten Toren:")
display(most_goals_df[['team', 'total_goals']])


Top 10 Teams nach durchschnittlichen Toren pro Spiel:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
129,New Caledonia,363.0,131,222.0,107,585.0,238,2.46,2.77,2.07
179,Tahiti,282.0,104,239.0,110,521.0,214,2.43,2.71,2.17
70,Germany,1249.0,496,861.0,438,2110.0,934,2.26,2.52,1.97
57,England,1172.0,506,1066.0,506,2238.0,1012,2.21,2.32,2.11
27,Brazil,1400.0,564,749.0,419,2149.0,983,2.19,2.48,1.79
128,Netherlands,1029.0,429,635.0,362,1664.0,791,2.1,2.4,1.75
11,Australia,720.0,300,382.0,234,1102.0,534,2.06,2.4,1.63
172,Spain,865.0,355,549.0,332,1414.0,687,2.06,2.44,1.65
83,Hungary,997.0,434,766.0,443,1763.0,877,2.01,2.3,1.73
167,Solomon Islands,223.0,88,159.0,103,382.0,191,2.0,2.53,1.54



DataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
129,New Caledonia,363.0,131,222.0,107,585.0,238,2.46,2.77,2.07
179,Tahiti,282.0,104,239.0,110,521.0,214,2.43,2.71,2.17
167,Solomon Islands,223.0,88,159.0,103,382.0,191,2.0,2.53,1.54
70,Germany,1249.0,496,861.0,438,2110.0,934,2.26,2.52,1.97
27,Brazil,1400.0,564,749.0,419,2149.0,983,2.19,2.48,1.79
172,Spain,865.0,355,549.0,332,1414.0,687,2.06,2.44,1.65
128,Netherlands,1029.0,429,635.0,362,1664.0,791,2.1,2.4,1.75
11,Australia,720.0,300,382.0,234,1102.0,534,2.06,2.4,1.63
130,New Zealand,353.0,149,296.0,223,649.0,372,1.74,2.37,1.33
176,Sweden,1169.0,497,875.0,530,2044.0,1027,1.99,2.35,1.65



DataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
179,Tahiti,282.0,104,239.0,110,521.0,214,2.43,2.71,2.17
57,England,1172.0,506,1066.0,506,2238.0,1012,2.21,2.32,2.11
129,New Caledonia,363.0,131,222.0,107,585.0,238,2.46,2.77,2.07
70,Germany,1249.0,496,861.0,438,2110.0,934,2.26,2.52,1.97
142,Papua New Guinea,86.0,55,146.0,77,232.0,132,1.76,1.56,1.9
27,Brazil,1400.0,564,749.0,419,2149.0,983,2.19,2.48,1.79
128,Netherlands,1029.0,429,635.0,362,1664.0,791,2.1,2.4,1.75
83,Hungary,997.0,434,766.0,443,1763.0,877,2.01,2.3,1.73
197,Vanuatu,110.0,72,203.0,118,313.0,190,1.65,1.53,1.72
172,Spain,865.0,355,549.0,332,1414.0,687,2.06,2.44,1.65


Länder mit den meisten erzielten Toren:


Unnamed: 0,team,total_goals
0,England,2238.0
1,Brazil,2149.0
2,Germany,2110.0
3,Sweden,2044.0
4,Argentina,1933.0
5,Hungary,1763.0
6,Netherlands,1664.0
7,South Korea,1657.0
8,Mexico,1646.0
9,France,1530.0


In [66]:


# Anzahl der Heimspiele und Heimtore pro Land
home_stats = df_results_neutral_countries.groupby('home_team').agg(
    home_goals=('home_score', 'sum'),
    home_games=('home_score', 'count')
).reset_index()
home_stats.columns = ['team', 'home_goals', 'home_games']

# Anzahl der Auswärtsspiele und Auswärtstore pro Land
away_stats = df_results_neutral_countries.groupby('away_team').agg(
    away_goals=('away_score', 'sum'),
    away_games=('away_score', 'count')
).reset_index()
away_stats.columns = ['team', 'away_goals', 'away_games']

# Zusammenführen der Heim- und Auswärtsstatistiken
total_stats = pd.merge(home_stats, away_stats, on='team', how='outer').fillna(0)

# Berechnung der Gesamtzahl der Tore und Spiele
total_stats['total_goals'] = total_stats['home_goals'] + total_stats['away_goals']
total_stats['total_games'] = total_stats['home_games'] + total_stats['away_games']

# Berechnung der durchschnittlichen Tore pro Spiel
total_stats['avg_goals_per_game'] = total_stats['total_goals'] / total_stats['total_games']

# Berechnung der durchschnittlichen Heim- und Auswärtstore pro Spiel
total_stats['avg_home_goals_per_game'] = total_stats['home_goals'] / total_stats['home_games']
total_stats['avg_away_goals_per_game'] = total_stats['away_goals'] / total_stats['away_games']

# Sortieren nach durchschnittlichen Toren pro Spiel in absteigender Reihenfolge
total_stats = total_stats.sort_values(by='avg_goals_per_game', ascending=False)

# Runden der durchschnittlichen Tore pro Spiel auf 2 Dezimalstellen
total_stats['avg_goals_per_game'] = total_stats['avg_goals_per_game'].round(2)
total_stats['avg_home_goals_per_game'] = total_stats['avg_home_goals_per_game'].round(2)
total_stats['avg_away_goals_per_game'] = total_stats['avg_away_goals_per_game'].round(2)

# Ausgabe der Top 10 Teams nach durchschnittlichen Toren pro Spiel
top_avg_goals = total_stats.head(10)
print("Top 10 Teams nach durchschnittlichen Toren pro Spiel:")
display(top_avg_goals)

# Sortieren nach durchschnittlichen Heimtoren pro Spiel
sorted_by_avg_home_goals = total_stats.sort_values(by='avg_home_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:")
display(sorted_by_avg_home_goals)

# Sortieren nach durchschnittlichen Auswärtstoren pro Spiel
sorted_by_avg_away_goals = total_stats.sort_values(by='avg_away_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:")
display(sorted_by_avg_away_goals)

# Erstellen eines DataFrames für Länder mit den meisten erzielten Toren
most_goals_df = total_stats.sort_values(by='total_goals', ascending=False).reset_index(drop=True).head(10)
print("Länder mit den meisten erzielten Toren:")
display(most_goals_df[['team', 'total_goals']])


Top 10 Teams nach durchschnittlichen Toren pro Spiel:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
128,New Caledonia,172.0,58.0,136.0,51.0,308.0,109.0,2.83,2.97,2.67
178,Tahiti,173.0,66.0,142.0,46.0,315.0,112.0,2.81,2.62,3.09
63,Fiji,200.0,68.0,77.0,35.0,277.0,103.0,2.69,2.94,2.2
166,Solomon Islands,126.0,38.0,94.0,55.0,220.0,93.0,2.37,3.32,1.71
27,Brazil,556.0,232.0,192.0,96.0,748.0,328.0,2.28,2.4,2.0
8,Argentina,562.0,234.0,89.0,58.0,651.0,292.0,2.23,2.4,1.53
88,Israel,62.0,27.0,26.0,13.0,88.0,40.0,2.2,2.3,2.0
82,Hungary,83.0,34.0,52.0,28.0,135.0,62.0,2.18,2.44,1.86
11,Australia,190.0,85.0,90.0,47.0,280.0,132.0,2.12,2.24,1.91
141,Papua New Guinea,53.0,35.0,109.0,42.0,162.0,77.0,2.1,1.51,2.6



DataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
166,Solomon Islands,126.0,38.0,94.0,55.0,220.0,93.0,2.37,3.32,1.71
128,New Caledonia,172.0,58.0,136.0,51.0,308.0,109.0,2.83,2.97,2.67
63,Fiji,200.0,68.0,77.0,35.0,277.0,103.0,2.69,2.94,2.2
155,Samoa,26.0,9.0,12.0,11.0,38.0,20.0,1.9,2.89,1.09
154,Saint Vincent and the Grenadines,37.0,14.0,39.0,32.0,76.0,46.0,1.65,2.64,1.22
178,Tahiti,173.0,66.0,142.0,46.0,315.0,112.0,2.81,2.62,3.09
82,Hungary,83.0,34.0,52.0,28.0,135.0,62.0,2.18,2.44,1.86
8,Argentina,562.0,234.0,89.0,58.0,651.0,292.0,2.23,2.4,1.53
27,Brazil,556.0,232.0,192.0,96.0,748.0,328.0,2.28,2.4,2.0
73,Grenada,72.0,31.0,41.0,32.0,113.0,63.0,1.79,2.32,1.28



DataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
178,Tahiti,173.0,66.0,142.0,46.0,315.0,112.0,2.81,2.62,3.09
128,New Caledonia,172.0,58.0,136.0,51.0,308.0,109.0,2.83,2.97,2.67
141,Papua New Guinea,53.0,35.0,109.0,42.0,162.0,77.0,2.1,1.51,2.6
127,Netherlands,91.0,53.0,88.0,39.0,179.0,92.0,1.95,1.72,2.26
63,Fiji,200.0,68.0,77.0,35.0,277.0,103.0,2.69,2.94,2.2
195,Uzbekistan,61.0,36.0,122.0,58.0,183.0,94.0,1.95,1.69,2.1
188,Turkmenistan,50.0,25.0,73.0,35.0,123.0,60.0,2.05,2.0,2.09
194,Uruguay,93.0,62.0,347.0,170.0,440.0,232.0,1.9,1.5,2.04
196,Vanuatu,54.0,27.0,157.0,78.0,211.0,105.0,2.01,2.0,2.01
27,Brazil,556.0,232.0,192.0,96.0,748.0,328.0,2.28,2.4,2.0


Länder mit den meisten erzielten Toren:


Unnamed: 0,team,total_goals
0,Brazil,748.0
1,South Korea,710.0
2,Mexico,693.0
3,Argentina,651.0
4,Japan,514.0
5,Iraq,511.0
6,Indonesia,510.0
7,Myanmar,499.0
8,Kuwait,467.0
9,Thailand,467.0


In [67]:


# Anzahl der Heimspiele und Heimtore pro Land
home_stats = df_results_not_neutral_countries.groupby('home_team').agg(
    home_goals=('home_score', 'sum'),
    home_games=('home_score', 'count')
).reset_index()
home_stats.columns = ['team', 'home_goals', 'home_games']

# Anzahl der Auswärtsspiele und Auswärtstore pro Land
away_stats = df_results_not_neutral_countries.groupby('away_team').agg(
    away_goals=('away_score', 'sum'),
    away_games=('away_score', 'count')
).reset_index()
away_stats.columns = ['team', 'away_goals', 'away_games']

# Zusammenführen der Heim- und Auswärtsstatistiken
total_stats = pd.merge(home_stats, away_stats, on='team', how='outer').fillna(0)

# Berechnung der Gesamtzahl der Tore und Spiele
total_stats['total_goals'] = total_stats['home_goals'] + total_stats['away_goals']
total_stats['total_games'] = total_stats['home_games'] + total_stats['away_games']

# Berechnung der durchschnittlichen Tore pro Spiel
total_stats['avg_goals_per_game'] = total_stats['total_goals'] / total_stats['total_games']

# Berechnung der durchschnittlichen Heim- und Auswärtstore pro Spiel
total_stats['avg_home_goals_per_game'] = total_stats['home_goals'] / total_stats['home_games']
total_stats['avg_away_goals_per_game'] = total_stats['away_goals'] / total_stats['away_games']

# Sortieren nach durchschnittlichen Toren pro Spiel in absteigender Reihenfolge
total_stats = total_stats.sort_values(by='avg_goals_per_game', ascending=False)

# Runden der durchschnittlichen Tore pro Spiel auf 2 Dezimalstellen
total_stats['avg_goals_per_game'] = total_stats['avg_goals_per_game'].round(2)
total_stats['avg_home_goals_per_game'] = total_stats['avg_home_goals_per_game'].round(2)
total_stats['avg_away_goals_per_game'] = total_stats['avg_away_goals_per_game'].round(2)

# Ausgabe der Top 10 Teams nach durchschnittlichen Toren pro Spiel
top_avg_goals = total_stats.head(10)
print("Top 10 Teams nach durchschnittlichen Toren pro Spiel:")
display(top_avg_goals)

# Sortieren nach durchschnittlichen Heimtoren pro Spiel
sorted_by_avg_home_goals = total_stats.sort_values(by='avg_home_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:")
display(sorted_by_avg_home_goals)

# Sortieren nach durchschnittlichen Auswärtstoren pro Spiel
sorted_by_avg_away_goals = total_stats.sort_values(by='avg_away_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:")
display(sorted_by_avg_away_goals)

# Erstellen eines DataFrames für Länder mit den meisten erzielten Toren
most_goals_df = total_stats.sort_values(by='total_goals', ascending=False).reset_index(drop=True).head(10)
print("Länder mit den meisten erzielten Toren:")
display(most_goals_df[['team', 'total_goals']])


Top 10 Teams nach durchschnittlichen Toren pro Spiel:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
68,Germany,1075.0,409.0,751.0,378,1826.0,787.0,2.32,2.63,1.99
55,England,1095.0,454.0,982.0,449,2077.0,903.0,2.3,2.41,2.19
127,New Caledonia,191.0,73.0,86.0,56,277.0,129.0,2.15,2.62,1.54
26,Brazil,844.0,332.0,557.0,323,1401.0,655.0,2.14,2.54,1.72
126,Netherlands,938.0,376.0,547.0,323,1485.0,699.0,2.12,2.49,1.69
170,Spain,742.0,299.0,444.0,266,1186.0,565.0,2.1,2.48,1.67
174,Sweden,1088.0,450.0,780.0,462,1868.0,912.0,2.05,2.42,1.69
10,Australia,530.0,215.0,292.0,187,822.0,402.0,2.04,2.47,1.56
177,Tahiti,109.0,38.0,97.0,64,206.0,102.0,2.02,2.87,1.52
81,Hungary,914.0,400.0,714.0,415,1628.0,815.0,2.0,2.28,1.72



DataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
177,Tahiti,109.0,38.0,97.0,64,206.0,102.0,2.02,2.87,1.52
68,Germany,1075.0,409.0,751.0,378,1826.0,787.0,2.32,2.63,1.99
127,New Caledonia,191.0,73.0,86.0,56,277.0,129.0,2.15,2.62,1.54
26,Brazil,844.0,332.0,557.0,323,1401.0,655.0,2.14,2.54,1.72
126,Netherlands,938.0,376.0,547.0,323,1485.0,699.0,2.12,2.49,1.69
170,Spain,742.0,299.0,444.0,266,1186.0,565.0,2.1,2.48,1.67
10,Australia,530.0,215.0,292.0,187,822.0,402.0,2.04,2.47,1.56
128,New Zealand,275.0,112.0,229.0,162,504.0,274.0,1.84,2.46,1.41
174,Sweden,1088.0,450.0,780.0,462,1868.0,912.0,2.05,2.42,1.69
55,England,1095.0,454.0,982.0,449,2077.0,903.0,2.3,2.41,2.19



DataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:


Unnamed: 0,team,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
55,England,1095.0,454.0,982.0,449,2077.0,903.0,2.3,2.41,2.19
68,Germany,1075.0,409.0,751.0,378,1826.0,787.0,2.32,2.63,1.99
26,Brazil,844.0,332.0,557.0,323,1401.0,655.0,2.14,2.54,1.72
81,Hungary,914.0,400.0,714.0,415,1628.0,815.0,2.0,2.28,1.72
126,Netherlands,938.0,376.0,547.0,323,1485.0,699.0,2.12,2.49,1.69
174,Sweden,1088.0,450.0,780.0,462,1868.0,912.0,2.05,2.42,1.69
170,Spain,742.0,299.0,444.0,266,1186.0,565.0,2.1,2.48,1.67
157,Scotland,717.0,371.0,594.0,379,1311.0,750.0,1.75,1.93,1.57
10,Australia,530.0,215.0,292.0,187,822.0,402.0,2.04,2.47,1.56
11,Austria,769.0,379.0,521.0,333,1290.0,712.0,1.81,2.03,1.56


Länder mit den meisten erzielten Toren:


Unnamed: 0,team,total_goals
0,England,2077.0
1,Sweden,1868.0
2,Germany,1826.0
3,Hungary,1628.0
4,Netherlands,1485.0
5,Brazil,1401.0
6,Denmark,1369.0
7,France,1334.0
8,Scotland,1311.0
9,Belgium,1292.0


In [68]:


# Funktion zur Berechnung der Gewinnrate mit Behandlung von Null-Division
def calculate_win_rate(wins, games):
    if games == 0:
        return 0
    else:
        return wins / games

# Statistiken für Heimspiele berechnen
home_stats = df_shootouts.groupby('home_team').agg(
    home_games=('home_team', 'size'),
    home_wins=('winner', lambda x: (x == df_shootouts.loc[x.index, 'home_team']).sum()),
    home_losses=('winner', lambda x: (x != df_shootouts.loc[x.index, 'home_team']).sum())
).reset_index()
home_stats['home_win_rate'] = home_stats.apply(lambda row: calculate_win_rate(row['home_wins'], row['home_games']), axis=1)
home_stats.rename(columns={'home_team': 'team'}, inplace=True)

# Statistiken für Auswärtsspiele berechnen
away_stats = df_shootouts.groupby('away_team').agg(
    away_games=('away_team', 'size'),
    away_wins=('winner', lambda x: (x == df_shootouts.loc[x.index, 'away_team']).sum()),
    away_losses=('winner', lambda x: (x != df_shootouts.loc[x.index, 'away_team']).sum())
).reset_index()
away_stats['away_win_rate'] = away_stats.apply(lambda row: calculate_win_rate(row['away_wins'], row['away_games']), axis=1)
away_stats.rename(columns={'away_team': 'team'}, inplace=True)

# Zusammenführen der Heim- und Auswärtsstatistiken
total_stats = pd.merge(home_stats, away_stats, on='team', how='outer').fillna(0)

# Berechnung der Gesamtstatistiken
total_stats['total_games'] = total_stats['home_games'] + total_stats['away_games']
total_stats['total_wins'] = total_stats['home_wins'] + total_stats['away_wins']
total_stats['total_losses'] = total_stats['home_losses'] + total_stats['away_losses']
total_stats['total_win_rate'] = total_stats.apply(lambda row: calculate_win_rate(row['total_wins'], row['total_games']), axis=1)

# Sortieren nach Gesamtgewinnrate und Gesamtspielen in absteigender Reihenfolge
sorted_total_stats = total_stats.sort_values(by=['total_win_rate', 'total_games'], ascending=[False, False]).reset_index(drop=True)
sorted_total_stats = sorted_total_stats.head(10)

# DataFrame sortiert nach Heimgewinnrate und dann nach den meisten Heimspielen
sorted_by_home_win_rate = total_stats.sort_values(by=['home_win_rate', 'home_games'], ascending=[False, False]).reset_index(drop=True)
sorted_by_home_win_rate = sorted_by_home_win_rate.head(10)

# DataFrame sortiert nach Auswärtsgewinnrate und dann nach den meisten Auswärtsspielen
sorted_by_away_win_rate = total_stats.sort_values(by=['away_win_rate', 'away_games'], ascending=[False, False]).reset_index(drop=True)
sorted_by_away_win_rate = sorted_by_away_win_rate.head(10)

# Ausgabe der sortierten DataFrames
print("Sorted by Total Win Rate and Total Games:")
display(sorted_total_stats[['team', 'home_games', 'home_wins', 'home_losses', 'home_win_rate', 
                            'away_games', 'away_wins', 'away_losses', 'away_win_rate', 
                            'total_games', 'total_wins', 'total_losses', 'total_win_rate']])

print("\nDataFrame sorted by Home Win Rate and then by Home Games:")
display(sorted_by_home_win_rate[['team', 'home_games', 'home_wins', 'home_losses', 'home_win_rate', 
                                 'away_games', 'away_wins', 'away_losses', 'away_win_rate', 
                                 'total_games', 'total_wins', 'total_losses', 'total_win_rate']])

print("\nDataFrame sorted by Away Win Rate and then by Away Games:")
display(sorted_by_away_win_rate[['team', 'home_games', 'home_wins', 'home_losses', 'home_win_rate', 
                                 'away_games', 'away_wins', 'away_losses', 'away_win_rate', 
                                 'total_games', 'total_wins', 'total_losses', 'total_win_rate']])

# DataFrame mit den meisten Gesamtspielen
most_games_stats = total_stats.sort_values(by='total_games', ascending=False).reset_index(drop=True)
most_games_stats = most_games_stats.head(10)
print("\nContinents with Most Total Games:")
display(most_games_stats[['team', 'total_games']])


Sorted by Total Win Rate and Total Games:


Unnamed: 0,team,home_games,home_wins,home_losses,home_win_rate,away_games,away_wins,away_losses,away_win_rate,total_games,total_wins,total_losses,total_win_rate
0,Ethiopia,5.0,5.0,0.0,1.0,2.0,2.0,0.0,1.0,7.0,7.0,0.0,1.0
1,Seychelles,2.0,2.0,0.0,1.0,2.0,2.0,0.0,1.0,4.0,4.0,0.0,1.0
2,Bahrain,2.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0,3.0,3.0,0.0,1.0
3,Benin,2.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0,3.0,3.0,0.0,1.0
4,Guatemala,2.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0,3.0,3.0,0.0,1.0
5,Kazakhstan,2.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0,3.0,3.0,0.0,1.0
6,Antigua and Barbuda,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,2.0,2.0,0.0,1.0
7,Belgium,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,2.0,2.0,0.0,1.0
8,Cyprus,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,1.0
9,Scotland,1.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,2.0,2.0,0.0,1.0



DataFrame sorted by Home Win Rate and then by Home Games:


Unnamed: 0,team,home_games,home_wins,home_losses,home_win_rate,away_games,away_wins,away_losses,away_win_rate,total_games,total_wins,total_losses,total_win_rate
0,Australia,5.0,5.0,0.0,1.0,3.0,0.0,3.0,0.0,8.0,5.0,3.0,0.625
1,Ethiopia,5.0,5.0,0.0,1.0,2.0,2.0,0.0,1.0,7.0,7.0,0.0,1.0
2,Bahrain,2.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0,3.0,3.0,0.0,1.0
3,Benin,2.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0,3.0,3.0,0.0,1.0
4,Cyprus,2.0,2.0,0.0,1.0,0.0,0.0,0.0,0.0,2.0,2.0,0.0,1.0
5,Guatemala,2.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0,3.0,3.0,0.0,1.0
6,Guinea,2.0,2.0,0.0,1.0,8.0,6.0,2.0,0.75,10.0,8.0,2.0,0.8
7,Honduras,2.0,2.0,0.0,1.0,6.0,2.0,4.0,0.333333,8.0,4.0,4.0,0.5
8,Kazakhstan,2.0,2.0,0.0,1.0,1.0,1.0,0.0,1.0,3.0,3.0,0.0,1.0
9,Seychelles,2.0,2.0,0.0,1.0,2.0,2.0,0.0,1.0,4.0,4.0,0.0,1.0



DataFrame sorted by Away Win Rate and then by Away Games:


Unnamed: 0,team,home_games,home_wins,home_losses,home_win_rate,away_games,away_wins,away_losses,away_win_rate,total_games,total_wins,total_losses,total_win_rate
0,Indonesia,8.0,7.0,1.0,0.875,3.0,3.0,0.0,1.0,11.0,10.0,1.0,0.909091
1,Poland,1.0,0.0,1.0,0.0,3.0,3.0,0.0,1.0,4.0,3.0,1.0,0.75
2,Saudi Arabia,6.0,4.0,2.0,0.666667,3.0,3.0,0.0,1.0,9.0,7.0,2.0,0.777778
3,Sweden,3.0,1.0,2.0,0.333333,3.0,3.0,0.0,1.0,6.0,4.0,2.0,0.666667
4,Ukraine,1.0,0.0,1.0,0.0,3.0,3.0,0.0,1.0,4.0,3.0,1.0,0.75
5,Ecuador,1.0,0.0,1.0,0.0,2.0,2.0,0.0,1.0,3.0,2.0,1.0,0.666667
6,Ethiopia,5.0,5.0,0.0,1.0,2.0,2.0,0.0,1.0,7.0,7.0,0.0,1.0
7,Germany,5.0,4.0,1.0,0.8,2.0,2.0,0.0,1.0,7.0,6.0,1.0,0.857143
8,Portugal,3.0,1.0,2.0,0.333333,2.0,2.0,0.0,1.0,5.0,3.0,2.0,0.6
9,Seychelles,2.0,2.0,0.0,1.0,2.0,2.0,0.0,1.0,4.0,4.0,0.0,1.0



Continents with Most Total Games:


Unnamed: 0,team,total_games
0,South Africa,28.0
1,Zambia,25.0
2,South Korea,23.0
3,Thailand,22.0
4,Egypt,21.0
5,Argentina,21.0
6,Senegal,20.0
7,Iran,19.0
8,Malawi,18.0
9,Ivory Coast,18.0


In [69]:
from scipy.stats import pearsonr
# Kodierung der Teams
df_corr = df_results
df_corr['home_team_code'] = df_results['home_team'].astype('category').cat.codes
df_corr['away_team_code'] = df_results['away_team'].astype('category').cat.codes

# Korrelation und p-Wert berechnen
correlation, p_value = pearsonr(df_corr['home_score'], df_corr['away_team_code'])

print(f'Korrelation zwischen der Anzahl der geschossenen Tore des Heimteams und dem Auswärtsteam: {correlation}')
print(f'p-Wert: {p_value}')

Korrelation zwischen der Anzahl der geschossenen Tore des Heimteams und dem Auswärtsteam: -0.023424813724462
p-Wert: 2.2407503635057763e-06


In [70]:
# Boolean-Spalten in numerische Werte umwandeln
df_results['neutral'] = df_results['neutral'].astype(int)
df_results['winning_country_is_home_team'] = df_results['winning_country_is_home_team'].astype(int)

# Korrelation berechnen
correlation = df_results['neutral'].corr(df_results['winning_country_is_home_team'])

print(f'Korrelation zwischen neutral und winning_country_is_home_team: {correlation}')

Korrelation zwischen neutral und winning_country_is_home_team: -0.06106875329996535


Continents

In [71]:


# Anzahl der Heim- und Auswärtsspiele für jedes continent zählen
total_matches_home = df_results_distinct_continent['home_continent'].value_counts()
total_matches_away = df_results_distinct_continent['away_continent'].value_counts()

# Gesamtanzahl der Spiele durch Addition der Heim- und Auswärtsspiele für jedes continent zählen
total_matches = total_matches_home.add(total_matches_away, fill_value=0).sort_values(ascending=False)

# Anzahl der Heim- und Auswärtssiege für jedes continent zählen
home_wins = df_results_distinct_continent[df_results_distinct_continent['home_score'] > df_results_distinct_continent['away_score']]['home_continent'].value_counts()
away_wins = df_results_distinct_continent[df_results_distinct_continent['away_score'] > df_results_distinct_continent['home_score']]['away_continent'].value_counts()

# Gesamtanzahl der Siege durch Addition der Heim- und Auswärtssiege für jedes continent zählen
total_wins = home_wins.add(away_wins, fill_value=0).sort_values(ascending=False)

# Gewinnprozentsätze für Heim-, Auswärts- und Gesamtspiele berechnen
home_win_percentages = (home_wins / total_matches_home * 100).sort_values(ascending=False)
away_win_percentages = (away_wins / total_matches_away * 100).sort_values(ascending=False)
total_win_percentages = (total_wins / total_matches * 100).sort_values(ascending=False)

# Ergebnisse in einem DataFrame zusammenfassen
results = pd.DataFrame({
    'Total Matches': total_matches,
    'Home Matches': total_matches_home,
    'Away Matches': total_matches_away,
    'Total Wins': total_wins,
    'Home Wins': home_wins,
    'Away Wins': away_wins,
    'Total Win %': total_win_percentages,
    'Home Win %': home_win_percentages,
    'Away Win %': away_win_percentages
}).fillna(0)

# DataFrame nach Gesamtgewinnrate sortieren und anzeigen
results_sorted_by_total_win_rate = results.sort_values(by='Total Win %', ascending=False).head(10)
print("DataFrame nach Gesamtgewinnrate sortiert:")
display(results_sorted_by_total_win_rate)

# DataFrame nach Heimgewinnrate sortieren und anzeigen
results_sorted_by_home_win_rate = results.sort_values(by='Home Win %', ascending=False).head(10)
print("\nDataFrame nach Heimgewinnrate sortiert:")
display(results_sorted_by_home_win_rate)

# DataFrame nach Auswärtsgewinnrate sortieren und anzeigen
results_sorted_by_away_win_rate = results.sort_values(by='Away Win %', ascending=False).head(10)
print("\nDataFrame nach Auswärtsgewinnrate sortiert:")
display(results_sorted_by_away_win_rate)

# DataFrame für Länder mit den meisten gespielten Matches, absteigend sortiert nach Gesamtmatches
most_matches_df = results.sort_values(by='Total Matches', ascending=False).reset_index().rename(columns={'index': 'Country'}).head(10)
print("\nLänder mit den meisten gespielten Matches:")
display(most_matches_df[['Country', 'Total Matches']])


DataFrame nach Gesamtgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Europe,4707,2313,2394,2237,1285,952,47.524963,55.555556,39.766082
South America,2659,1173,1486,1253,693,560,47.122979,59.079284,37.685061
Oceania,730,340,390,261,154,107,35.753425,45.294118,27.435897
Africa,1665,630,1035,523,274,249,31.411411,43.492063,24.057971
North America,2288,1225,1063,696,472,224,30.41958,38.530612,21.072437
Asia,4273,2480,1793,1234,868,366,28.879008,35.0,20.412716



DataFrame nach Heimgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
South America,2659,1173,1486,1253,693,560,47.122979,59.079284,37.685061
Europe,4707,2313,2394,2237,1285,952,47.524963,55.555556,39.766082
Oceania,730,340,390,261,154,107,35.753425,45.294118,27.435897
Africa,1665,630,1035,523,274,249,31.411411,43.492063,24.057971
North America,2288,1225,1063,696,472,224,30.41958,38.530612,21.072437
Asia,4273,2480,1793,1234,868,366,28.879008,35.0,20.412716



DataFrame nach Auswärtsgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Europe,4707,2313,2394,2237,1285,952,47.524963,55.555556,39.766082
South America,2659,1173,1486,1253,693,560,47.122979,59.079284,37.685061
Oceania,730,340,390,261,154,107,35.753425,45.294118,27.435897
Africa,1665,630,1035,523,274,249,31.411411,43.492063,24.057971
North America,2288,1225,1063,696,472,224,30.41958,38.530612,21.072437
Asia,4273,2480,1793,1234,868,366,28.879008,35.0,20.412716



Länder mit den meisten gespielten Matches:


Unnamed: 0,Country,Total Matches
0,Europe,4707
1,Asia,4273
2,South America,2659
3,North America,2288
4,Africa,1665
5,Oceania,730


In [72]:


# Anzahl der Heim- und Auswärtsspiele für jedes continent zählen
total_matches_home = df_results_neutral_continents['home_continent'].value_counts()
total_matches_away = df_results_neutral_continents['away_continent'].value_counts()

# Gesamtanzahl der Spiele durch Addition der Heim- und Auswärtsspiele für jedes continent zählen
total_matches = total_matches_home.add(total_matches_away, fill_value=0).sort_values(ascending=False)

# Anzahl der Heim- und Auswärtssiege für jedes continent zählen
home_wins = df_results_neutral_continents[df_results_neutral_continents['home_score'] > df_results_neutral_continents['away_score']]['home_continent'].value_counts()
away_wins = df_results_neutral_continents[df_results_neutral_continents['away_score'] > df_results_neutral_continents['home_score']]['away_continent'].value_counts()

# Gesamtanzahl der Siege durch Addition der Heim- und Auswärtssiege für jedes continent zählen
total_wins = home_wins.add(away_wins, fill_value=0).sort_values(ascending=False)

# Gewinnprozentsätze für Heim-, Auswärts- und Gesamtspiele berechnen
home_win_percentages = (home_wins / total_matches_home * 100).sort_values(ascending=False)
away_win_percentages = (away_wins / total_matches_away * 100).sort_values(ascending=False)
total_win_percentages = (total_wins / total_matches * 100).sort_values(ascending=False)

# Ergebnisse in einem DataFrame zusammenfassen
results = pd.DataFrame({
    'Total Matches': total_matches,
    'Home Matches': total_matches_home,
    'Away Matches': total_matches_away,
    'Total Wins': total_wins,
    'Home Wins': home_wins,
    'Away Wins': away_wins,
    'Total Win %': total_win_percentages,
    'Home Win %': home_win_percentages,
    'Away Win %': away_win_percentages
}).fillna(0)

# DataFrame nach Gesamtgewinnrate sortieren und anzeigen
results_sorted_by_total_win_rate = results.sort_values(by='Total Win %', ascending=False).head(10)
print("DataFrame nach Gesamtgewinnrate sortiert:")
display(results_sorted_by_total_win_rate)

# DataFrame nach Heimgewinnrate sortieren und anzeigen
results_sorted_by_home_win_rate = results.sort_values(by='Home Win %', ascending=False).head(10)
print("\nDataFrame nach Heimgewinnrate sortiert:")
display(results_sorted_by_home_win_rate)

# DataFrame nach Auswärtsgewinnrate sortieren und anzeigen
results_sorted_by_away_win_rate = results.sort_values(by='Away Win %', ascending=False).head(10)
print("\nDataFrame nach Auswärtsgewinnrate sortiert:")
display(results_sorted_by_away_win_rate)

# DataFrame für Länder mit den meisten gespielten Matches, absteigend sortiert nach Gesamtmatches
most_matches_df = results.sort_values(by='Total Matches', ascending=False).reset_index().rename(columns={'index': 'Country'}).head(10)
print("\nLänder mit den meisten gespielten Matches:")
display(most_matches_df[['Country', 'Total Matches']])


DataFrame nach Gesamtgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
South America,1017,499,518,499,282,217,49.06588,56.513026,41.891892
Europe,1071,516,555,476,258,218,44.444444,50.0,39.279279
Africa,661,289,372,240,131,109,36.308623,45.32872,29.301075
Oceania,224,105,119,73,42,31,32.589286,40.0,26.05042
North America,807,493,314,249,173,76,30.855019,35.091278,24.203822
Asia,950,463,487,258,138,120,27.157895,29.805616,24.640657



DataFrame nach Heimgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
South America,1017,499,518,499,282,217,49.06588,56.513026,41.891892
Europe,1071,516,555,476,258,218,44.444444,50.0,39.279279
Africa,661,289,372,240,131,109,36.308623,45.32872,29.301075
Oceania,224,105,119,73,42,31,32.589286,40.0,26.05042
North America,807,493,314,249,173,76,30.855019,35.091278,24.203822
Asia,950,463,487,258,138,120,27.157895,29.805616,24.640657



DataFrame nach Auswärtsgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
South America,1017,499,518,499,282,217,49.06588,56.513026,41.891892
Europe,1071,516,555,476,258,218,44.444444,50.0,39.279279
Africa,661,289,372,240,131,109,36.308623,45.32872,29.301075
Oceania,224,105,119,73,42,31,32.589286,40.0,26.05042
Asia,950,463,487,258,138,120,27.157895,29.805616,24.640657
North America,807,493,314,249,173,76,30.855019,35.091278,24.203822



Länder mit den meisten gespielten Matches:


Unnamed: 0,Country,Total Matches
0,Europe,1071
1,South America,1017
2,Asia,950
3,North America,807
4,Africa,661
5,Oceania,224


In [73]:


# Anzahl der Heim- und Auswärtsspiele für jedes continent zählen
total_matches_home = df_results_not_neutral_continents['home_continent'].value_counts()
total_matches_away = df_results_not_neutral_continents['away_continent'].value_counts()

# Gesamtanzahl der Spiele durch Addition der Heim- und Auswärtsspiele für jedes continent zählen
total_matches = total_matches_home.add(total_matches_away, fill_value=0).sort_values(ascending=False)

# Anzahl der Heim- und Auswärtssiege für jedes continent zählen
home_wins = df_results_not_neutral_continents[df_results_not_neutral_continents['home_score'] > df_results_not_neutral_continents['away_score']]['home_continent'].value_counts()
away_wins = df_results_not_neutral_continents[df_results_not_neutral_continents['away_score'] > df_results_not_neutral_continents['home_score']]['away_continent'].value_counts()

# Gesamtanzahl der Siege durch Addition der Heim- und Auswärtssiege für jedes continent zählen
total_wins = home_wins.add(away_wins, fill_value=0).sort_values(ascending=False)

# Gewinnprozentsätze für Heim-, Auswärts- und Gesamtspiele berechnen
home_win_percentages = (home_wins / total_matches_home * 100).sort_values(ascending=False)
away_win_percentages = (away_wins / total_matches_away * 100).sort_values(ascending=False)
total_win_percentages = (total_wins / total_matches * 100).sort_values(ascending=False)

# Ergebnisse in einem DataFrame zusammenfassen
results = pd.DataFrame({
    'Total Matches': total_matches,
    'Home Matches': total_matches_home,
    'Away Matches': total_matches_away,
    'Total Wins': total_wins,
    'Home Wins': home_wins,
    'Away Wins': away_wins,
    'Total Win %': total_win_percentages,
    'Home Win %': home_win_percentages,
    'Away Win %': away_win_percentages
}).fillna(0)

# DataFrame nach Gesamtgewinnrate sortieren und anzeigen
results_sorted_by_total_win_rate = results.sort_values(by='Total Win %', ascending=False).head(10)
print("DataFrame nach Gesamtgewinnrate sortiert:")
display(results_sorted_by_total_win_rate)

# DataFrame nach Heimgewinnrate sortieren und anzeigen
results_sorted_by_home_win_rate = results.sort_values(by='Home Win %', ascending=False).head(10)
print("\nDataFrame nach Heimgewinnrate sortiert:")
display(results_sorted_by_home_win_rate)

# DataFrame nach Auswärtsgewinnrate sortieren und anzeigen
results_sorted_by_away_win_rate = results.sort_values(by='Away Win %', ascending=False).head(10)
print("\nDataFrame nach Auswärtsgewinnrate sortiert:")
display(results_sorted_by_away_win_rate)

# DataFrame für Länder mit den meisten gespielten Matches, absteigend sortiert nach Gesamtmatches
most_matches_df = results.sort_values(by='Total Matches', ascending=False).reset_index().rename(columns={'index': 'Country'}).head(10)
print("\nLänder mit den meisten gespielten Matches:")
display(most_matches_df[['Country', 'Total Matches']])


DataFrame nach Gesamtgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Europe,3636,1797,1839,1761,1027,734,48.432343,57.150807,39.912996
South America,1642,674,968,754,411,343,45.91961,60.979228,35.433884
Oceania,506,235,271,188,112,76,37.15415,47.659574,28.04428
North America,1481,732,749,447,299,148,30.182309,40.846995,19.75968
Asia,3323,2017,1306,976,730,246,29.37105,36.192365,18.836141
Africa,1004,341,663,283,143,140,28.187251,41.935484,21.116139



DataFrame nach Heimgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
South America,1642,674,968,754,411,343,45.91961,60.979228,35.433884
Europe,3636,1797,1839,1761,1027,734,48.432343,57.150807,39.912996
Oceania,506,235,271,188,112,76,37.15415,47.659574,28.04428
Africa,1004,341,663,283,143,140,28.187251,41.935484,21.116139
North America,1481,732,749,447,299,148,30.182309,40.846995,19.75968
Asia,3323,2017,1306,976,730,246,29.37105,36.192365,18.836141



DataFrame nach Auswärtsgewinnrate sortiert:


Unnamed: 0,Total Matches,Home Matches,Away Matches,Total Wins,Home Wins,Away Wins,Total Win %,Home Win %,Away Win %
Europe,3636,1797,1839,1761,1027,734,48.432343,57.150807,39.912996
South America,1642,674,968,754,411,343,45.91961,60.979228,35.433884
Oceania,506,235,271,188,112,76,37.15415,47.659574,28.04428
Africa,1004,341,663,283,143,140,28.187251,41.935484,21.116139
North America,1481,732,749,447,299,148,30.182309,40.846995,19.75968
Asia,3323,2017,1306,976,730,246,29.37105,36.192365,18.836141



Länder mit den meisten gespielten Matches:


Unnamed: 0,Country,Total Matches
0,Europe,3636
1,Asia,3323
2,South America,1642
3,North America,1481
4,Africa,1004
5,Oceania,506


In [74]:


# Anzahl der Heimspiele und Heimtore pro Land
home_stats = df_results_distinct_continent.groupby('home_continent').agg(
    home_goals=('home_score', 'sum'),
    home_games=('home_score', 'count')
).reset_index()
home_stats.columns = ['continent', 'home_goals', 'home_games']

# Anzahl der Auswärtsspiele und Auswärtstore pro Land
away_stats = df_results_distinct_continent.groupby('away_continent').agg(
    away_goals=('away_score', 'sum'),
    away_games=('away_score', 'count')
).reset_index()
away_stats.columns = ['continent', 'away_goals', 'away_games']

# Zusammenführen der Heim- und Auswärtsstatistiken
total_stats = pd.merge(home_stats, away_stats, on='continent', how='outer').fillna(0)

# Berechnung der Gesamtzahl der Tore und Spiele
total_stats['total_goals'] = total_stats['home_goals'] + total_stats['away_goals']
total_stats['total_games'] = total_stats['home_games'] + total_stats['away_games']

# Berechnung der durchschnittlichen Tore pro Spiel
total_stats['avg_goals_per_game'] = total_stats['total_goals'] / total_stats['total_games']

# Berechnung der durchschnittlichen Heim- und Auswärtstore pro Spiel
total_stats['avg_home_goals_per_game'] = total_stats['home_goals'] / total_stats['home_games']
total_stats['avg_away_goals_per_game'] = total_stats['away_goals'] / total_stats['away_games']

# Sortieren nach durchschnittlichen Toren pro Spiel in absteigender Reihenfolge
total_stats = total_stats.sort_values(by='avg_goals_per_game', ascending=False)

# Runden der durchschnittlichen Tore pro Spiel auf 2 Dezimalstellen
total_stats['avg_goals_per_game'] = total_stats['avg_goals_per_game'].round(2)
total_stats['avg_home_goals_per_game'] = total_stats['avg_home_goals_per_game'].round(2)
total_stats['avg_away_goals_per_game'] = total_stats['avg_away_goals_per_game'].round(2)

# Ausgabe der Top 10 continents nach durchschnittlichen Toren pro Spiel
top_avg_goals = total_stats.head(10)
print("Top 10 continents nach durchschnittlichen Toren pro Spiel:")
display(top_avg_goals)

# Sortieren nach durchschnittlichen Heimtoren pro Spiel
sorted_by_avg_home_goals = total_stats.sort_values(by='avg_home_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:")
display(sorted_by_avg_home_goals)

# Sortieren nach durchschnittlichen Auswärtstoren pro Spiel
sorted_by_avg_away_goals = total_stats.sort_values(by='avg_away_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:")
display(sorted_by_avg_away_goals)

# Erstellen eines DataFrames für Länder mit den meisten erzielten Toren
most_goals_df = total_stats.sort_values(by='total_goals', ascending=False).reset_index(drop=True).head(10)
print("Länder mit den meisten erzielten Toren:")
display(most_goals_df[['continent', 'total_goals']])


Top 10 continents nach durchschnittlichen Toren pro Spiel:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
5,South America,2350.0,1173,1984.0,1486,4334.0,2659,1.63,2.0,1.34
2,Europe,4369.0,2313,3269.0,2394,7638.0,4707,1.62,1.89,1.37
4,Oceania,542.0,340,435.0,390,977.0,730,1.34,1.59,1.12
0,Africa,953.0,630,1119.0,1035,2072.0,1665,1.24,1.51,1.08
3,North America,1629.0,1225,1066.0,1063,2695.0,2288,1.18,1.33,1.0
1,Asia,3265.0,2480,1703.0,1793,4968.0,4273,1.16,1.32,0.95



DataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
5,South America,2350.0,1173,1984.0,1486,4334.0,2659,1.63,2.0,1.34
2,Europe,4369.0,2313,3269.0,2394,7638.0,4707,1.62,1.89,1.37
4,Oceania,542.0,340,435.0,390,977.0,730,1.34,1.59,1.12
0,Africa,953.0,630,1119.0,1035,2072.0,1665,1.24,1.51,1.08
3,North America,1629.0,1225,1066.0,1063,2695.0,2288,1.18,1.33,1.0
1,Asia,3265.0,2480,1703.0,1793,4968.0,4273,1.16,1.32,0.95



DataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
2,Europe,4369.0,2313,3269.0,2394,7638.0,4707,1.62,1.89,1.37
5,South America,2350.0,1173,1984.0,1486,4334.0,2659,1.63,2.0,1.34
4,Oceania,542.0,340,435.0,390,977.0,730,1.34,1.59,1.12
0,Africa,953.0,630,1119.0,1035,2072.0,1665,1.24,1.51,1.08
3,North America,1629.0,1225,1066.0,1063,2695.0,2288,1.18,1.33,1.0
1,Asia,3265.0,2480,1703.0,1793,4968.0,4273,1.16,1.32,0.95


Länder mit den meisten erzielten Toren:


Unnamed: 0,continent,total_goals
0,Europe,7638.0
1,Asia,4968.0
2,South America,4334.0
3,North America,2695.0
4,Africa,2072.0
5,Oceania,977.0


In [75]:


# Anzahl der Heimspiele und Heimtore pro Land
home_stats = df_results_neutral_continents.groupby('home_continent').agg(
    home_goals=('home_score', 'sum'),
    home_games=('home_score', 'count')
).reset_index()
home_stats.columns = ['continent', 'home_goals', 'home_games']

# Anzahl der Auswärtsspiele und Auswärtstore pro Land
away_stats = df_results_neutral_continents.groupby('away_continent').agg(
    away_goals=('away_score', 'sum'),
    away_games=('away_score', 'count')
).reset_index()
away_stats.columns = ['continent', 'away_goals', 'away_games']

# Zusammenführen der Heim- und Auswärtsstatistiken
total_stats = pd.merge(home_stats, away_stats, on='continent', how='outer').fillna(0)

# Berechnung der Gesamtzahl der Tore und Spiele
total_stats['total_goals'] = total_stats['home_goals'] + total_stats['away_goals']
total_stats['total_games'] = total_stats['home_games'] + total_stats['away_games']

# Berechnung der durchschnittlichen Tore pro Spiel
total_stats['avg_goals_per_game'] = total_stats['total_goals'] / total_stats['total_games']

# Berechnung der durchschnittlichen Heim- und Auswärtstore pro Spiel
total_stats['avg_home_goals_per_game'] = total_stats['home_goals'] / total_stats['home_games']
total_stats['avg_away_goals_per_game'] = total_stats['away_goals'] / total_stats['away_games']

# Sortieren nach durchschnittlichen Toren pro Spiel in absteigender Reihenfolge
total_stats = total_stats.sort_values(by='avg_goals_per_game', ascending=False)

# Runden der durchschnittlichen Tore pro Spiel auf 2 Dezimalstellen
total_stats['avg_goals_per_game'] = total_stats['avg_goals_per_game'].round(2)
total_stats['avg_home_goals_per_game'] = total_stats['avg_home_goals_per_game'].round(2)
total_stats['avg_away_goals_per_game'] = total_stats['avg_away_goals_per_game'].round(2)

# Ausgabe der Top 10 continents nach durchschnittlichen Toren pro Spiel
top_avg_goals = total_stats.head(10)
print("Top 10 continents nach durchschnittlichen Toren pro Spiel:")
display(top_avg_goals)

# Sortieren nach durchschnittlichen Heimtoren pro Spiel
sorted_by_avg_home_goals = total_stats.sort_values(by='avg_home_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:")
display(sorted_by_avg_home_goals)

# Sortieren nach durchschnittlichen Auswärtstoren pro Spiel
sorted_by_avg_away_goals = total_stats.sort_values(by='avg_away_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:")
display(sorted_by_avg_away_goals)

# Erstellen eines DataFrames für Länder mit den meisten erzielten Toren
most_goals_df = total_stats.sort_values(by='total_goals', ascending=False).reset_index(drop=True).head(10)
print("Länder mit den meisten erzielten Toren:")
display(most_goals_df[['continent', 'total_goals']])


Top 10 continents nach durchschnittlichen Toren pro Spiel:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
5,South America,928.0,499,766.0,518,1694.0,1017,1.67,1.86,1.48
0,Africa,506.0,289,476.0,372,982.0,661,1.49,1.75,1.28
2,Europe,868.0,516,712.0,555,1580.0,1071,1.48,1.68,1.28
1,Asia,616.0,463,564.0,487,1180.0,950,1.24,1.33,1.16
3,North America,617.0,493,346.0,314,963.0,807,1.19,1.25,1.1
4,Oceania,137.0,105,130.0,119,267.0,224,1.19,1.3,1.09



DataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
5,South America,928.0,499,766.0,518,1694.0,1017,1.67,1.86,1.48
0,Africa,506.0,289,476.0,372,982.0,661,1.49,1.75,1.28
2,Europe,868.0,516,712.0,555,1580.0,1071,1.48,1.68,1.28
1,Asia,616.0,463,564.0,487,1180.0,950,1.24,1.33,1.16
4,Oceania,137.0,105,130.0,119,267.0,224,1.19,1.3,1.09
3,North America,617.0,493,346.0,314,963.0,807,1.19,1.25,1.1



DataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
5,South America,928.0,499,766.0,518,1694.0,1017,1.67,1.86,1.48
0,Africa,506.0,289,476.0,372,982.0,661,1.49,1.75,1.28
2,Europe,868.0,516,712.0,555,1580.0,1071,1.48,1.68,1.28
1,Asia,616.0,463,564.0,487,1180.0,950,1.24,1.33,1.16
3,North America,617.0,493,346.0,314,963.0,807,1.19,1.25,1.1
4,Oceania,137.0,105,130.0,119,267.0,224,1.19,1.3,1.09


Länder mit den meisten erzielten Toren:


Unnamed: 0,continent,total_goals
0,South America,1694.0
1,Europe,1580.0
2,Asia,1180.0
3,Africa,982.0
4,North America,963.0
5,Oceania,267.0


In [76]:


# Anzahl der Heimspiele und Heimtore pro Land
home_stats = df_results_not_neutral_continents.groupby('home_continent').agg(
    home_goals=('home_score', 'sum'),
    home_games=('home_score', 'count')
).reset_index()
home_stats.columns = ['continent', 'home_goals', 'home_games']

# Anzahl der Auswärtsspiele und Auswärtstore pro Land
away_stats = df_results_not_neutral_continents.groupby('away_continent').agg(
    away_goals=('away_score', 'sum'),
    away_games=('away_score', 'count')
).reset_index()
away_stats.columns = ['continent', 'away_goals', 'away_games']

# Zusammenführen der Heim- und Auswärtsstatistiken
total_stats = pd.merge(home_stats, away_stats, on='continent', how='outer').fillna(0)

# Berechnung der Gesamtzahl der Tore und Spiele
total_stats['total_goals'] = total_stats['home_goals'] + total_stats['away_goals']
total_stats['total_games'] = total_stats['home_games'] + total_stats['away_games']

# Berechnung der durchschnittlichen Tore pro Spiel
total_stats['avg_goals_per_game'] = total_stats['total_goals'] / total_stats['total_games']

# Berechnung der durchschnittlichen Heim- und Auswärtstore pro Spiel
total_stats['avg_home_goals_per_game'] = total_stats['home_goals'] / total_stats['home_games']
total_stats['avg_away_goals_per_game'] = total_stats['away_goals'] / total_stats['away_games']

# Sortieren nach durchschnittlichen Toren pro Spiel in absteigender Reihenfolge
total_stats = total_stats.sort_values(by='avg_goals_per_game', ascending=False)

# Runden der durchschnittlichen Tore pro Spiel auf 2 Dezimalstellen
total_stats['avg_goals_per_game'] = total_stats['avg_goals_per_game'].round(2)
total_stats['avg_home_goals_per_game'] = total_stats['avg_home_goals_per_game'].round(2)
total_stats['avg_away_goals_per_game'] = total_stats['avg_away_goals_per_game'].round(2)

# Ausgabe der Top 10 continents nach durchschnittlichen Toren pro Spiel
top_avg_goals = total_stats.head(10)
print("Top 10 continents nach durchschnittlichen Toren pro Spiel:")
display(top_avg_goals)

# Sortieren nach durchschnittlichen Heimtoren pro Spiel
sorted_by_avg_home_goals = total_stats.sort_values(by='avg_home_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:")
display(sorted_by_avg_home_goals)

# Sortieren nach durchschnittlichen Auswärtstoren pro Spiel
sorted_by_avg_away_goals = total_stats.sort_values(by='avg_away_goals_per_game', ascending=False).head(10)
print("\nDataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:")
display(sorted_by_avg_away_goals)

# Erstellen eines DataFrames für Länder mit den meisten erzielten Toren
most_goals_df = total_stats.sort_values(by='total_goals', ascending=False).reset_index(drop=True).head(10)
print("Länder mit den meisten erzielten Toren:")
display(most_goals_df[['continent', 'total_goals']])


Top 10 continents nach durchschnittlichen Toren pro Spiel:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
2,Europe,3501.0,1797,2557.0,1839,6058.0,3636,1.67,1.95,1.39
5,South America,1422.0,674,1218.0,968,2640.0,1642,1.61,2.11,1.26
4,Oceania,405.0,235,305.0,271,710.0,506,1.4,1.72,1.13
3,North America,1012.0,732,720.0,749,1732.0,1481,1.17,1.38,0.96
1,Asia,2649.0,2017,1139.0,1306,3788.0,3323,1.14,1.31,0.87
0,Africa,447.0,341,643.0,663,1090.0,1004,1.09,1.31,0.97



DataFrame nach durchschnittlichen Heimtoren pro Spiel sortiert:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
5,South America,1422.0,674,1218.0,968,2640.0,1642,1.61,2.11,1.26
2,Europe,3501.0,1797,2557.0,1839,6058.0,3636,1.67,1.95,1.39
4,Oceania,405.0,235,305.0,271,710.0,506,1.4,1.72,1.13
3,North America,1012.0,732,720.0,749,1732.0,1481,1.17,1.38,0.96
1,Asia,2649.0,2017,1139.0,1306,3788.0,3323,1.14,1.31,0.87
0,Africa,447.0,341,643.0,663,1090.0,1004,1.09,1.31,0.97



DataFrame nach durchschnittlichen Auswärtstoren pro Spiel sortiert:


Unnamed: 0,continent,home_goals,home_games,away_goals,away_games,total_goals,total_games,avg_goals_per_game,avg_home_goals_per_game,avg_away_goals_per_game
2,Europe,3501.0,1797,2557.0,1839,6058.0,3636,1.67,1.95,1.39
5,South America,1422.0,674,1218.0,968,2640.0,1642,1.61,2.11,1.26
4,Oceania,405.0,235,305.0,271,710.0,506,1.4,1.72,1.13
0,Africa,447.0,341,643.0,663,1090.0,1004,1.09,1.31,0.97
3,North America,1012.0,732,720.0,749,1732.0,1481,1.17,1.38,0.96
1,Asia,2649.0,2017,1139.0,1306,3788.0,3323,1.14,1.31,0.87


Länder mit den meisten erzielten Toren:


Unnamed: 0,continent,total_goals
0,Europe,6058.0
1,Asia,3788.0
2,South America,2640.0
3,North America,1732.0
4,Africa,1090.0
5,Oceania,710.0


In [77]:


# Funktion zur Berechnung der Gewinnrate mit Behandlung von Null-Division
def calculate_win_rate(wins, games):
    if games == 0:
        return 0
    else:
        return wins / games

# Statistiken für Heimspiele berechnen
home_stats = df_shootouts_distinct_continents.groupby('home_continent').agg(
    home_games=('home_continent', 'size'),
    home_wins=('winning_continent', lambda x: (x == df_shootouts_distinct_continents.loc[x.index, 'home_continent']).sum()),
    home_losses=('winning_continent', lambda x: (x != df_shootouts_distinct_continents.loc[x.index, 'home_continent']).sum())
).reset_index()
home_stats['home_win_rate'] = home_stats.apply(lambda row: calculate_win_rate(row['home_wins'], row['home_games']), axis=1)
home_stats.rename(columns={'home_continent': 'continent'}, inplace=True)

# Statistiken für Auswärtsspiele berechnen
away_stats = df_shootouts_distinct_continents.groupby('away_continent').agg(
    away_games=('away_continent', 'size'),
    away_wins=('winning_continent', lambda x: (x == df_shootouts_distinct_continents.loc[x.index, 'away_continent']).sum()),
    away_losses=('winning_continent', lambda x: (x != df_shootouts_distinct_continents.loc[x.index, 'away_continent']).sum())
).reset_index()
away_stats['away_win_rate'] = away_stats.apply(lambda row: calculate_win_rate(row['away_wins'], row['away_games']), axis=1)
away_stats.rename(columns={'away_continent': 'continent'}, inplace=True)

# Zusammenführen der Heim- und Auswärtsstatistiken
total_stats = pd.merge(home_stats, away_stats, on='continent', how='outer').fillna(0)

# Berechnung der Gesamtstatistiken
total_stats['total_games'] = total_stats['home_games'] + total_stats['away_games']
total_stats['total_wins'] = total_stats['home_wins'] + total_stats['away_wins']
total_stats['total_losses'] = total_stats['home_losses'] + total_stats['away_losses']
total_stats['total_win_rate'] = total_stats.apply(lambda row: calculate_win_rate(row['total_wins'], row['total_games']), axis=1)

# Sortieren nach Gesamtgewinnrate und Gesamtspielen in absteigender Reihenfolge
sorted_total_stats = total_stats.sort_values(by=['total_win_rate', 'total_games'], ascending=[False, False]).reset_index(drop=True)
sorted_total_stats = sorted_total_stats.head(10)

# DataFrame sortiert nach Heimgewinnrate und dann nach den meisten Heimspielen
sorted_by_home_win_rate = total_stats.sort_values(by=['home_win_rate', 'home_games'], ascending=[False, False]).reset_index(drop=True)
sorted_by_home_win_rate = sorted_by_home_win_rate.head(10)

# DataFrame sortiert nach Auswärtsgewinnrate und dann nach den meisten Auswärtsspielen
sorted_by_away_win_rate = total_stats.sort_values(by=['away_win_rate', 'away_games'], ascending=[False, False]).reset_index(drop=True)
sorted_by_away_win_rate = sorted_by_away_win_rate.head(10)

# Ausgabe der sortierten DataFrames
print("Sorted by Total Win Rate and Total Games:")
display(sorted_total_stats[['continent', 'home_games', 'home_wins', 'home_losses', 'home_win_rate', 
                            'away_games', 'away_wins', 'away_losses', 'away_win_rate', 
                            'total_games', 'total_wins', 'total_losses', 'total_win_rate']])

print("\nDataFrame sorted by Home Win Rate and then by Home Games:")
display(sorted_by_home_win_rate[['continent', 'home_games', 'home_wins', 'home_losses', 'home_win_rate', 
                                 'away_games', 'away_wins', 'away_losses', 'away_win_rate', 
                                 'total_games', 'total_wins', 'total_losses', 'total_win_rate']])

print("\nDataFrame sorted by Away Win Rate and then by Away Games:")
display(sorted_by_away_win_rate[['continent', 'home_games', 'home_wins', 'home_losses', 'home_win_rate', 
                                 'away_games', 'away_wins', 'away_losses', 'away_win_rate', 
                                 'total_games', 'total_wins', 'total_losses', 'total_win_rate']])

# DataFrame mit den meisten Gesamtspielen
most_games_stats = total_stats.sort_values(by='total_games', ascending=False).reset_index(drop=True)
most_games_stats = most_games_stats.head(10)
print("\nContinents with Most Total Games:")
display(most_games_stats[['continent', 'total_games']])


Sorted by Total Win Rate and Total Games:


Unnamed: 0,continent,home_games,home_wins,home_losses,home_win_rate,away_games,away_wins,away_losses,away_win_rate,total_games,total_wins,total_losses,total_win_rate
0,Asia,33,23,10,0.69697,19,9,10,0.473684,52,32,20,0.615385
1,South America,22,15,7,0.681818,16,8,8,0.5,38,23,15,0.605263
2,Oceania,7,5,2,0.714286,4,0,4,0.0,11,5,6,0.454545
3,North America,11,5,6,0.454545,10,4,6,0.4,21,9,12,0.428571
4,Africa,12,4,8,0.333333,20,9,11,0.45,32,13,19,0.40625
5,Europe,17,8,9,0.470588,33,12,21,0.363636,50,20,30,0.4



DataFrame sorted by Home Win Rate and then by Home Games:


Unnamed: 0,continent,home_games,home_wins,home_losses,home_win_rate,away_games,away_wins,away_losses,away_win_rate,total_games,total_wins,total_losses,total_win_rate
0,Oceania,7,5,2,0.714286,4,0,4,0.0,11,5,6,0.454545
1,Asia,33,23,10,0.69697,19,9,10,0.473684,52,32,20,0.615385
2,South America,22,15,7,0.681818,16,8,8,0.5,38,23,15,0.605263
3,Europe,17,8,9,0.470588,33,12,21,0.363636,50,20,30,0.4
4,North America,11,5,6,0.454545,10,4,6,0.4,21,9,12,0.428571
5,Africa,12,4,8,0.333333,20,9,11,0.45,32,13,19,0.40625



DataFrame sorted by Away Win Rate and then by Away Games:


Unnamed: 0,continent,home_games,home_wins,home_losses,home_win_rate,away_games,away_wins,away_losses,away_win_rate,total_games,total_wins,total_losses,total_win_rate
0,South America,22,15,7,0.681818,16,8,8,0.5,38,23,15,0.605263
1,Asia,33,23,10,0.69697,19,9,10,0.473684,52,32,20,0.615385
2,Africa,12,4,8,0.333333,20,9,11,0.45,32,13,19,0.40625
3,North America,11,5,6,0.454545,10,4,6,0.4,21,9,12,0.428571
4,Europe,17,8,9,0.470588,33,12,21,0.363636,50,20,30,0.4
5,Oceania,7,5,2,0.714286,4,0,4,0.0,11,5,6,0.454545



Continents with Most Total Games:


Unnamed: 0,continent,total_games
0,Asia,52
1,Europe,50
2,South America,38
3,Africa,32
4,North America,21
5,Oceania,11


In [78]:
df_results_distinct_continent.head()

Unnamed: 0,date,home_team,away_team,home_score,away_score,tournament,city,country,neutral,home_continent,away_continent,winning_country,winning_continent,winning_country_is_home_team
61,1888-09-19,Scotland,Canada,4.0,0.0,Friendly,Glasgow,Scotland,False,Europe,North America,Scotland,Europe,True
474,1916-08-20,Sweden,United States,2.0,3.0,Friendly,Stockholm,Sweden,False,Europe,North America,United States,North America,False
475,1916-09-03,Norway,United States,1.0,1.0,Friendly,Kristiania,Norway,False,Europe,North America,Unentschieden,Unentschieden,False
635,1921-07-09,Canada,Scotland,0.0,1.0,Friendly,Montreal,Canada,False,North America,Europe,Scotland,Europe,False
803,1923-10-26,Turkey,Romania,2.0,2.0,Friendly,Istanbul,Turkey,False,Asia,Europe,Unentschieden,Unentschieden,False
