In [None]:
import pandas as pd # Import the Pandas library as 'pd' for data manipulation
import matplotlib.pyplot as plt # Import the Matplotlib library as 'plt' for basic data visualization
import seaborn as sns   # Import the Seaborn library as 'sns' for enhanced data visualization
pd.set_option('display.max_columns', None)  # Set an option to display all columns in DataFrames (no truncation)

In [None]:
# Load the FIFA teams data into a DataFrame
df = pd.read_csv('C:\\Users\\figue\OneDrive\\Desktop\\DataAnalytics\\Projects\\Python_Projects\\FIFA\\FIFA_Teams_15-23.csv')
df

In [None]:
# Dataframe columns
print(df.columns)

In [None]:
# List of column names to retain in the DataFrame
columns_to_keep = [
    'fifa_version', 
    'team_name', 
    'league_name', 
    'nationality_name', 
    'overall', 
    'attack', 
    'midfield', 
    'defence', 
    'home_stadium'
]

# Select only the desired columns from the original DataFrame
df = df[columns_to_keep]

In [None]:
# Dataframe columns
print(df.columns)

In [None]:
# Rename the column "fifa_version" to "year"
df.rename(columns={'fifa_version': 'year'}, inplace=True)

# Calculate the new year values
df['year'] = df['year'] + 2000  # Adding 2000 to convert 15 to 2015, 16 to 2016, and so on
df

In [None]:
# Write to CSV file
df.to_csv("C:\\Users\\figue\\OneDrive\\Desktop\\DataAnalytics\\Projects\\Python_Projects\\FIFA\\Clean_Data_FIFA_Teams_15-23.csv", index=False)

In [None]:
# Load the CLEAN FIFA teams data into a DataFrame
clean_df = pd.read_csv('C:\\Users\\figue\OneDrive\\Desktop\\DataAnalytics\\Projects\\Python_Projects\\FIFA\\Clean_Data_FIFA_Teams_15-23.csv')
clean_df

In [None]:
# Task 1: Team Performance Analysis
# The code below analyzes and visualizes the average performance of FIFA teams over the years.
# It calculates the mean overall rating of teams for each year and then creates a line plot
# with markers to visualize the trend. The plot is titled, and the x and y axis labels are added
# for clarity. Finally, the plot is displayed to present the analysis results.

# Calculate the average overall rating of teams for each year
team_performance = clean_df.groupby('year')['overall'].mean()

# Create a line plot to visualize the average team performance over the years
team_performance.plot(kind='line', marker='o')

# Set the plot title, x-axis label, and y-axis label
plt.title('Average Team Performance Over The Years')
plt.xlabel('Years')
plt.ylabel('Average Overall Rating')

# Display the plot
plt.show()

In [None]:
# Task 2: League Comparison
# The code below analyzes and visualizes the average team ratings comparison among different leagues over the years.
# It calculates the mean overall rating of teams for each year and each league, and then creates a line plot
# with markers to show how the average ratings of teams from different leagues evolve over time. The plot is
# titled, x and y axis labels are added, and a legend is included to indicate the league categories.
# Finally, the plot is displayed to present the league comparison analysis.

# Calculate the average overall rating of teams for each year and each league
league_comparison = clean_df.groupby(['year', 'league_name'])['overall'].mean().unstack()

# Create a line plot to compare average team ratings by league over the years
league_comparison.plot(kind='line', marker='o', figsize=(10, 6))

# Set the plot title, x-axis label, and y-axis label
plt.title('Average Team Ratings Comparison by League Over The Years')
plt.xlabel('Years')
plt.ylabel('Average Overall Rating')

# Add a legend with the title 'League' to differentiate league categories
plt.legend(title='League')

# Display the plot
plt.show()

In [None]:
# Task 3: National Team Analysis
# The code below analyzes and visualizes the average performance of the national team of England over the years.
# It selects the rows where the nationality is 'England', calculates the mean overall rating of the national
# team for each year, and then creates a line plot with markers to show how the average ratings of the
# national team evolve over time. The plot is titled, and x and y axis labels are added for clarity.
# Finally, the plot is displayed to present the national team analysis.

# Select rows where the 'nationality_name' column is 'England'
national_team_analysis = clean_df[clean_df['nationality_name'] == 'England']

# Calculate the average overall rating of the national team for each year
national_team_performance = national_team_analysis.groupby('year')['overall'].mean()

# Create a line plot to visualize the average national team performance over the years
national_team_performance.plot(kind='line', marker='o')

# Set the plot title, x-axis label, and y-axis label
plt.title('Average National Team Performance Over The Years')
plt.xlabel('Years')
plt.ylabel('Average Overall Rating')

# Display the plot
plt.show()

In [None]:
# Task 4: Stadium Influence
# The code below analyzes and visualizes the influence of home stadiums on team ratings.
# It calculates the mean overall rating of teams for each home stadium, sorts the stadiums
# in descending order based on their average ratings, selects the top 10 stadiums, and then
# creates a bar plot to show the top home stadiums with the highest team ratings. The plot is
# titled, and x and y axis labels are added for clarity. X-axis labels are rotated for better readability.
# Finally, the plot is displayed to present the stadium influence analysis.

# Calculate the average overall rating of teams for each home stadium
stadium_influence = clean_df.groupby('home_stadium')['overall'].mean().sort_values(ascending=False)

# Select the top 10 home stadiums with the highest team ratings
# reate a bar plot to visualize the top 10 home stadiums with the highest team ratings
stadium_influence[:10].plot(kind='bar', figsize=(10, 6))

# Set the plot title, x-axis label, and y-axis label
plt.title('Top 10 Home Stadiums with Highest Team Ratings')
plt.xlabel('Home Stadium')
plt.ylabel('Average Overall Rating')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)

# Display the plot
plt.show()

In [None]:
# Task 5: Time Series Analysis
# The code performs a time series analysis on the evolution of team attributes over the years.
# It calculates the mean ratings of 'attack', 'midfield', and 'defence' for each year using a pivot table,
# and then creates a line plot with markers to visualize how these attributes change over time. The plot is
# titled, and x and y axis labels are added for clarity. Finally, the plot is displayed to present the
# time series analysis results.

# Calculate the mean ratings of 'attack', 'midfield', and 'defence' for each year
time_series_analysis = clean_df.pivot_table(index='year', values=['attack', 'midfield', 'defence'], aggfunc='mean')

# Create a line plot to visualize the evolution of team attributes over the years
time_series_analysis.plot(kind='line', marker='o', figsize=(10, 6))

# Set the plot title, x-axis label, and y-axis label
plt.title('Team Attributes Evolution Over Years')
plt.xlabel('Year')
plt.ylabel('Average Rating')

# Display the plot
plt.show()

In [None]:
# Task 6: Correlations
# ThE code below performs a correlation analysis among team attributes ('overall', 'attack', 'midfield', 'defence').
# It calculates the correlation matrix and then creates a heatmap to visualize the correlations between these attributes.
# The heatmap colors indicate the strength and direction of the correlations. The plot is titled and displayed
# to present the correlation analysis results.

# Calculate the correlation matrix among 'overall', 'attack', 'midfield', and 'defence' attributes
correlation_matrix = clean_df[['overall', 'attack', 'midfield', 'defence']].corr()

# Create a heatmap to visualize the correlation matrix
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')

# Set the plot title
plt.title('Correlation Matrix')

# Display the plot
plt.show()

In [None]:
# Task 7: Top Teams
# The code below identifies and visualizes the top 10 teams with the highest overall ratings.
# It selects the top teams based on the 'overall' attribute, then creates a bar plot to
# display the teams' names on the x-axis and their corresponding overall ratings on the y-axis.
# The plot is titled, x and y axis labels are added, and x-axis labels are rotated for better readability.
# Finally, the plot is displayed to present the analysis results.

# Select the top 10 teams with the highest overall ratings
top_teams = clean_df.nlargest(10, 'overall')[['team_name', 'overall']]

# Create a bar plot to visualize the top 10 teams by their overall ratings
top_teams.plot(kind='bar', x='team_name', y='overall', figsize=(10, 6))

# Set the plot title, x-axis label, and y-axis label
plt.title('Top 10 Teams by Overall Rating')
plt.xlabel('Team')
plt.ylabel('Overall Rating')

# Rotate x-axis labels for better readability
plt.xticks(rotation=45)

# Display the plot
plt.show()

In [None]:
# Task 8: Data Distribution and Outliers
# The code below examines the distribution and identifies potential outliers in team ratings.
# It creates a boxplot to visualize the distribution of ratings for attributes 'overall', 'attack', 'midfield', and 'defence'.
# The boxplot provides information about the median, quartiles, and potential outliers in the data.
# The plot is titled and displayed to present the analysis results.

# Create a figure with a specific size for the boxplot
plt.figure(figsize=(12, 6))

# Create a boxplot to visualize the distribution and outliers of team ratings
clean_df[['overall', 'attack', 'midfield', 'defence']].boxplot()

# Set the plot title and y-axis label
plt.title('Distribution and Outliers of Team Ratings')
plt.ylabel('Rating')

# Display the plot
plt.show()