In [2]:
import pandas as pd
import plotly.express as px

df_match_info = pd.read_csv('project_data.csv')
df_match_info

Unnamed: 0,HomeTeamName,AwayTeamName,DateandTimeCET,MatchID,RoundName,ScoreHome,ScoreAway,Event,Time
0,Turkey,Italy,2021-06-11T21:00:00,2024447,final tournament,0,3,StartFirstHalf,2021-06-11T19:00:26.877
1,Turkey,Italy,2021-06-11T21:00:00,2024447,final tournament,0,3,GoalAttemptOffTarget,2021-06-11T19:02:35.427
2,Turkey,Italy,2021-06-11T21:00:00,2024447,final tournament,0,3,Foul,2021-06-11T19:11:43.757
3,Turkey,Italy,2021-06-11T21:00:00,2024447,final tournament,0,3,FreeKick,2021-06-11T19:11:58.677
4,Turkey,Italy,2021-06-11T21:00:00,2024447,final tournament,0,3,Foul,2021-06-11T19:12:25.197
...,...,...,...,...,...,...,...,...,...
6131,Italy,England,2021-07-11T21:00:00,2024491,final,1,1,PenaltySaved,2021-07-11T21:52:17.4
6132,Italy,England,2021-07-11T21:00:00,2024491,final,1,1,GoalAttemptSaved,2021-07-11T21:52:18.4
6133,Italy,England,2021-07-11T21:00:00,2024491,final,1,1,PenaltySaved,2021-07-11T21:53:31.727
6134,Italy,England,2021-07-11T21:00:00,2024491,final,1,1,GoalAttemptSaved,2021-07-11T21:53:32.727


Data Processing :
1. Remove useless columns (Event, Time)
2. Remove duplicated values generated by Event column on the df 
3. Export the result in a csv to visualize the data

In [3]:
df_match_info.drop(['Event', 'Time'], axis=1, inplace=True)
df_games = df_match_info.drop_duplicates()
df_games.to_csv('games_info.csv', index=False) 

Goal Statisctic Formatting :
1. Iterate over rows and retrieve HomeTeamName, AwayTeamName, ScoreHome, ScoreAway
2. Add values in country Stats --> Key (Country Name : string ) Values ('Goal_Scored': int, 'Goal_Conceded': int)
3. home_goals is added to HomeTeamName Goal_Scored value and AwayTeamName Goal_Conceded value
4. away_goals is added to AwayTeamName Goal_Scored value and HomeTeamName Goal_Conceded value
5. Transform the dictionnary into a dataframe and generate a column Goal_Difference based on (Goal_Scored - Goal_Conceded)
6. Sort Value by Goal_Difference

In [4]:
country_stats = {}

for index, row in df_games.iterrows():
    home_team = row['HomeTeamName']
    away_team = row['AwayTeamName']
    home_goals = row['ScoreHome']
    away_goals = row['ScoreAway']
    
    if home_team not in country_stats:
        country_stats[home_team] = {'Goal_Scored': 0, 'Goal_Conceded': 0}
    country_stats[home_team]['Goal_Scored'] += home_goals
    country_stats[home_team]['Goal_Conceded'] += away_goals
    
    if away_team not in country_stats:
        country_stats[away_team] = {'Goal_Scored': 0, 'Goal_Conceded': 0}
    country_stats[away_team]['Goal_Scored'] += away_goals
    country_stats[away_team]['Goal_Conceded'] += home_goals

result_df = pd.DataFrame.from_dict(country_stats, orient='index').reset_index()
result_df.columns = ['Country', 'Goal_Scored', 'Goal_Conceded']

result_df['Goal_Difference'] = result_df['Goal_Scored'] - result_df['Goal_Conceded']

result_df = result_df.sort_values(by='Goal_Difference', ascending=True)

result_df

Unnamed: 0,Country,Goal_Scored,Goal_Conceded,Goal_Difference
0,Turkey,1,8,-7
11,North Macedonia,2,8,-6
17,Slovakia,2,7,-5
7,Russia,2,7,-5
14,Scotland,1,5,-4
13,Ukraine,6,10,-4
2,Wales,3,6,-3
20,Hungary,3,6,-3
5,Finland,1,3,-2
16,Poland,4,6,-2


Bar Chart Creation with Hover effect 
1. Bar Chart initialization (x --> countries | y --> Goal difference), Color gradient based on Goal_Difference
2. Update figure layout by setting bounds of one goal on the y axis, setting a white background template and rotating x-axis by 45° for better visibility
3. Update when hovering on a bar by adding a black border over each bar, hover effect customization by adding a template 
4. Show the figure  

In [5]:
fig = px.bar(result_df, x='Country', y='Goal_Difference', 
             hover_data=['Goal_Scored', 'Goal_Conceded'],
             title="Goal Difference by Country",
             color='Goal_Difference',
             labels={'Goal_Difference': 'Goal Difference', 'Country': 'Country'})

fig.update_layout(yaxis=dict(tickmode='linear', tick0=0, dtick=1), title={'x':0.5, 'xanchor': 'center'}, xaxis_tickangle=-45, template='plotly_white')
fig.update_traces(
    hovertemplate="<b>%{x}</b><br>Goal Difference: %{y}<br>Goals Scored: %{customdata[0]}<br>Goals Conceded: %{customdata[1]}",
    marker_line_color='black',
    marker_line_width=1.5
)
fig.show()

ValueError: Mime type rendering requires nbformat>=4.2.0 but it is not installed