In [None]:
# importing python libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

## Reading csv files

In [None]:
pd.set_option('display.max_rows', None, 'display.max_columns', None)
stats_df = pd.read_csv('../input/premier-league/stats.csv')

In [None]:
stats_df.head()

## Finding the teams with total wins, losses and draws per total matches 

In [None]:
# Creating a new column draws
stats_df['draws'] = 38 - stats_df['wins'] - stats_df['losses']
# Changing the column location
pop_column = stats_df.pop('draws')
stats_df.insert(3, 'draws', pop_column)
# Creating a new column total_matches
stats_df['total_matches'] = stats_df['wins'] + stats_df['losses'] + stats_df['draws']
# Changing the column location
pop_column2 = stats_df.pop('total_matches')
stats_df.insert(4, 'total_matches', pop_column2)
stats_df.head()

In [None]:
# Creating a new dataframe to find the total wins, losses, draws, total_matches by grouping teams
df = stats_df.groupby('team').agg({'wins':'sum', 'losses':'sum', 'draws':'sum', 'total_matches':'sum'})
# Sorting the teams with most wins and getting the top 10 teams
df = df.sort_values(by = 'wins', ascending = False)
df.head(10)

we can see that Manchester United played all matches in the top division and also won most matches.

## Plotting the top 10 teams with most wins

In [None]:
teams = df.index[:10]
wins = df.wins[:10]
matches = df.total_matches[:10]
plt.style.use('fivethirtyeight')
# using x_index so that the bars can be placed side by side
x_index = np.arange(len(teams))
plt.bar(x_index - 0.25, wins, width = 0.5, label = 'Total Wins')
plt.bar(teams, matches, width = 0.35, label = 'Total Matches')
plt.ylabel('Total Wins per total matches', size = 14)
plt.xlabel('Teams', size = 15)
plt.xticks(rotation = 'vertical', size = 12)
plt.legend(loc = 4, prop = {'size': 10})
plt.show()

## Finding the teams with most losses

In [None]:
# Creating a new dataframe 
df2 = df.sort_values(by = 'losses', ascending = False)
df2.head(10)

We can observe that West Ham United and Sunderland lost most matches, but WHU played most matched in top division so it is on top of the table. Also <b>Everton is in both most wins and most losses dataframes</b>. 

## Plotting the top 10 teams with most losses

In [None]:
lost_teams = df2.index[:10]
losses = df.losses[:10]
total_matches = df.total_matches[:10]
plt.style.use('ggplot')
x_index2 = np.arange(len(lost_teams))
plt.bar(x_index2 - 0.25, losses, width = 0.5, label = 'Total losses')
plt.bar(lost_teams, total_matches, width = 0.35, label = 'Total Matches')
plt.ylabel('Total losses per total matches', size = 14, color = 'black')
plt.xlabel('Teams', size = 15, color = 'black')
plt.xticks(rotation = 'vertical', size = 12, color = 'black')
plt.yticks(color = 'black')
plt.legend(loc = 4, prop = {'size': 10})
plt.show()

## Finding the top 10 teams with most scoring chances and on-target chances

In [None]:
df3 = stats_df.groupby('team').agg({'total_scoring_att':'sum', 'ontarget_scoring_att':'sum'})
df3 = df3.sort_values(by = 'total_scoring_att', ascending = False)
df3.head(10)

## Plotting the chances created vs on-target chances for top 10 teams

In [None]:
teams = df3.index[:10]
scoring_chances = df3.total_scoring_att[:10]
ontarget = df3.ontarget_scoring_att[:10]
# Using seaborn style plot
plt.style.use('seaborn')
plt.bar(teams, scoring_chances, label = 'Total_Scoring_Chances')
plt.bar(teams, ontarget, label = 'On-target')
plt.xlabel('Teams', size = 15, color = 'black')
plt.ylabel('Scoring chances', size = 15, color = 'black')
plt.xticks(rotation = 'vertical', size = 12)
plt.yticks(size = 12)
plt.legend()
plt.show()

Clearly Chelsea created most chances and on-targets.

## Finding the champion for every season from 2006/07 to 2017/2018

In [None]:
df4 = stats_df.groupby('season').first()
df4

#### Counting the no of times a team being a champion

In [None]:
winner_df = df4.groupby('team').agg({'team':'count'})
winner_df = winner_df.rename(columns = {'team': 'no_of_times_being_champ'})
winner_df

## Plotting the above data using pie chart

In [None]:
team = winner_df.index
champ = winner_df.no_of_times_being_champ
explode = [0,0,0,0.075]
plt.style.use('fivethirtyeight')
plt.pie(champ, labels = team , explode = explode, wedgeprops = {'edgecolor':'black'}, autopct='%1.1f%%')
plt.title('Teams that are crowned champions')
plt.show()

Clearly Manchester United won the championship the most, thanks to Sir Alex Ferguson😀.