# Football Data Exploration and Visualization

## Part 1: Basic Data Exploration with Pandas

### 1. Inspecting the Data

In [None]:
# Load the dataset and display the first 5 rows
football_data = pd.read_csv('football_stats_2021_2022_cleaned.csv')
football_data.head()

**Question**: What are the columns in the dataset, and what information do they represent?

### 2. Data Summary

In [None]:
# Summarize the dataset
football_data.describe()

**Task**: Find the total number of teams in the dataset.

In [None]:
# Find the number of unique teams
len(football_data['Squad'].unique())

### 3. Filtering and Sorting

In [None]:
# Filter and sort teams with more than 85 points
football_data[football_data['Pts'] > 85].sort_values(by='Pts', ascending=False)

**Question**: How many teams had more than 85 points?

### 4. Grouping and Aggregation

In [None]:
# Group by country and calculate the average goals for
football_data.groupby('Country')['GF'].mean()

**Question**: Which country had the highest average goals for?

### 5. Finding Top Performers

In [None]:
# Create the 'Top Scorer' column
football_data['Top Scorer'] = football_data['Top Team Scorer'].str.split(' - ').str[0]

### 6. Advanced Pandas Task

In [None]:
# Find top 3 goalkeepers with the lowest GA
football_data[['Squad', 'Goalkeeper', 'GA']].sort_values(by='GA').head(3)

## Part 2: Data Visualization with Plotly

### 1. Bar Plot: Top 10 Teams by Points

In [None]:
import plotly.express as px

# Bar plot for top 10 teams by points
top_teams = football_data.nlargest(10, 'Pts')
fig = px.bar(top_teams, x='Squad', y='Pts', title='Top 10 Teams by Points')
fig.show()

### 2. Pie Chart: Distribution of Wins

In [None]:
# Pie chart for total wins by country
total_wins = football_data.groupby('Country')['W'].sum().reset_index()
fig = px.pie(total_wins, names='Country', values='W', title='Total Wins by Country')
fig.show()

### 3. Scatter Plot: Points vs Goals Scored

In [None]:
# Scatter plot for points vs goals scored
fig = px.scatter(football_data, x='Pts', y='GF', hover_data=['Squad'], title='Points vs Goals Scored')
fig.show()

### 4. Line Chart: Expected Goals vs Actual Goals

In [None]:
# Line chart for expected goals vs actual goals
top_5_teams = football_data.nlargest(5, 'Pts')
fig = px.line(top_5_teams, x='Squad', y=['xG', 'GF'], title='Expected Goals vs Actual Goals for Top 5 Teams')
fig.show()

### 5. Radar Chart: Comparing Key Stats for Top 2 Teams

In [None]:
from plotly.graph_objects import go

# Radar chart for top 2 teams comparison
top_2_teams = football_data.nlargest(2, 'Pts')
categories = ['GF', 'GA', 'Pts', 'xG']

fig = go.Figure()

for _, row in top_2_teams.iterrows():
    fig.add_trace(go.Scatterpolar(
        r=[row['GF'], row['GA'], row['Pts'], row['xG']],
        theta=categories,
        fill='toself',
        name=row['Squad']
    ))

fig.update_layout(title='Top 2 Teams Comparison', polar=dict(radialaxis=dict(visible=True)))
fig.show()