In [1]:
import pandas as pd
import warnings
import plotly.express as px  
import plotly.io as pio

In [2]:
df=pd.read_excel('Forbes Athlete List 2012-2019.xlsx')

In [3]:
warnings.filterwarnings("ignore")

In [4]:
df.head()

Unnamed: 0,Rank,Name,Pay,Salary/Winnings,Endorsements,Sport,Year
0,#1,Lionel Messi,$127 M,$92 M,$35 M,Soccer,2019
1,#2,Cristiano Ronaldo,$109 M,$65 M,$44 M,Soccer,2019
2,#3,Neymar,$105 M,$75 M,$30 M,Soccer,2019
3,#4,Canelo Alvarez,$94 M,$92 M,$2 M,Boxing,2019
4,#5,Roger Federer,$93.4 M,$7.4 M,$86 M,Tennis,2019


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 795 entries, 0 to 794
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   Rank             795 non-null    object
 1   Name             795 non-null    object
 2   Pay              795 non-null    object
 3   Salary/Winnings  795 non-null    object
 4   Endorsements     795 non-null    object
 5   Sport            795 non-null    object
 6   Year             795 non-null    int64 
dtypes: int64(1), object(6)
memory usage: 43.6+ KB


In [6]:
df.isnull().sum()

Rank               0
Name               0
Pay                0
Salary/Winnings    0
Endorsements       0
Sport              0
Year               0
dtype: int64

**No null values so we do not need to work with handling null values data**

### <div class='alert alert-info'> Data Preprocessing </div>

In [7]:
df.head()

Unnamed: 0,Rank,Name,Pay,Salary/Winnings,Endorsements,Sport,Year
0,#1,Lionel Messi,$127 M,$92 M,$35 M,Soccer,2019
1,#2,Cristiano Ronaldo,$109 M,$65 M,$44 M,Soccer,2019
2,#3,Neymar,$105 M,$75 M,$30 M,Soccer,2019
3,#4,Canelo Alvarez,$94 M,$92 M,$2 M,Boxing,2019
4,#5,Roger Federer,$93.4 M,$7.4 M,$86 M,Tennis,2019


 **<font color='green'>What we are doing next:</font>**

- Since we can see the values of Pay,Salary/Winning and Endorsements are not integer or float values<br>
  hence we cannot do furthur analysis
- So we do Data Preprocessing to replace the `$` which is indicating dollars, `M` which is indicating Million and `K` which<br>
  is indicating thousand with null string values
- Finally we use the astype function to convert the objects to float

In [8]:
df=df.sort_values(by='Year')

In [9]:
df['Endorsements']=df['Endorsements'].str.replace('$','')
df['Endorsements']=df['Endorsements'].str.replace('M','')
df['Endorsements']=df['Endorsements'].str.replace('K','')
df['Endorsements']=df['Endorsements'].str.strip()
df['Endorsements']=df['Endorsements'].astype('float')


In [10]:
df['Pay']=df['Pay'].str.replace('$','')
df['Pay']=df['Pay'].str.replace('M','')
df['Pay']=df['Pay'].str.replace('K','')
df['Pay']=df['Pay'].str.strip()
df['Pay']=df['Pay'].astype('float')

In [11]:
df['Salary/Winnings']=df['Salary/Winnings'].str.replace('$','')
df['Salary/Winnings']=df['Salary/Winnings'].str.replace('M','')
df['Salary/Winnings']=df['Salary/Winnings'].str.replace('K','')
df['Salary/Winnings']=df['Salary/Winnings'].str.strip()
df.drop(df.index[df['Salary/Winnings'] == '-'], inplace=True)
df['Salary/Winnings']=df['Salary/Winnings'].astype('float')

In [12]:
df.head()

Unnamed: 0,Rank,Name,Pay,Salary/Winnings,Endorsements,Sport,Year
794,100,Jake Peavy,16.6,16.5,0.1,Baseball,2012
721,27,Sam Bradford,27.8,26.8,1.0,Football,2012
722,28,Tom Brady,27.1,23.1,4.0,Football,2012
723,29,Joe Mauer,27.0,23.0,4.0,Baseball,2012
724,30,Eli Manning,26.6,18.6,8.0,Football,2012


**So now are data is ready for furthur analysis**

In [13]:
df.head()

Unnamed: 0,Rank,Name,Pay,Salary/Winnings,Endorsements,Sport,Year
794,100,Jake Peavy,16.6,16.5,0.1,Baseball,2012
721,27,Sam Bradford,27.8,26.8,1.0,Football,2012
722,28,Tom Brady,27.1,23.1,4.0,Football,2012
723,29,Joe Mauer,27.0,23.0,4.0,Baseball,2012
724,30,Eli Manning,26.6,18.6,8.0,Football,2012


In [14]:
df.shape

(794, 7)

In [15]:
de=df[df['Name'].isin(['Lionel Messi','Cristiano Ronaldo','Roger Federer'])]

In [16]:
de.head()

Unnamed: 0,Rank,Name,Pay,Salary/Winnings,Endorsements,Sport,Year
699,5,Roger Federer,52.7,7.7,45.0,Tennis,2012
703,9,Cristiano Ronaldo,42.5,20.5,22.0,Soccer,2012
705,11,Lionel Messi,39.0,20.0,19.0,Soccer,2012
596,2,Roger Federer,71.5,6.5,65.0,Tennis,2013
603,9,Cristiano Ronaldo,44.0,23.0,21.0,Soccer,2013


**We will work our data with only three atheletes for better understanding**

In [17]:
de.shape

(24, 7)

## <font color='green'> Scatter plot</font>

In [25]:
fig=px.scatter_3d(
data_frame=de,
x='Year',
y='Pay',
z='Endorsements',
color='Name',
color_discrete_sequence=['magenta','green','blue'],
log_y=True,  #we are adding a log scale for Pay, we can also do for x and z
template='ggplot2',
title='Highest Earnings of Athletes over the years',       
height=700,                 # height of graph in pixels
)

pio.show(fig)

## <font color='green'>Points to Note</font>

- We can rotate the graph however we want to check correlation for all the axis
- We can put our curson into the plots to get the exact values of our data
- We can see with the increase in years the Endorsement is also increasing for all three atheletes
- We can see with the increase in years the Pay is also increasing
- We have 3 different colours for the three different players and the colours has been marked in the legends.
- We can see in the year 2019 Roger Federer is having the highest pay from the plot
- Hence like this we can bring out many other Inferences by just analysing the Plot

## <div class='alert alert-info'> To put our own custom colors,symbols for different sports</div>

## </div>

In [20]:
fig=px.scatter_3d(
data_frame=de,
x='Year',
y='Pay',
z='Endorsements',
color='Name',
color_discrete_sequence=['magenta','green','blue'],
color_discrete_map={'Cristiano Ronaldo': 'black', 'Lionel Messi': 'yellow','Roger Federer':'pink'},
log_x=True,
template='ggplot2',
symbol='Sport',
title='Highest Earnings of Athletes over the years',       
height=700,                 # height of graph in pixels
)

pio.show(fig)

## <font color='green'>Points to Note</font>

- Here we are having the same data but we have given it our colours instead of the default colours given by plotly last time
- Here we are differentiating Messi and Ronaldo with ROger as we know Roger is a Tennis player whereas Messi and Ronaldo<br>
  are soccer players so we have given different shapes for soccer and tennis players.
  For soccer we have given diamond and for Tennis we have given normal circle

## <div class='alert alert-info'> Bubble plots having sizes depending on the Salary/Winnings</fov>

In [21]:
de.head()

Unnamed: 0,Rank,Name,Pay,Salary/Winnings,Endorsements,Sport,Year
699,5,Roger Federer,52.7,7.7,45.0,Tennis,2012
703,9,Cristiano Ronaldo,42.5,20.5,22.0,Soccer,2012
705,11,Lionel Messi,39.0,20.0,19.0,Soccer,2012
596,2,Roger Federer,71.5,6.5,65.0,Tennis,2013
603,9,Cristiano Ronaldo,44.0,23.0,21.0,Soccer,2013


In [22]:
fig=px.scatter_3d(
data_frame=de,
x='Year',
y='Pay',
z='Endorsements',
color='Name',
color_discrete_sequence=['magenta','green','blue'],
log_x=True,
size='Salary/Winnings',
size_max=50,
template='seaborn',  #I have changed the tempelate here
title='Highest Earnings of Athletes over the years',       
height=700,                 # height of graph in pixels
)

pio.show(fig)

## <font color='green'>Points to Note</font>

- Here we have made this into a bubble plot where the size of the  bubble is giving us the Salary/Winning value
- We can get the exact value by putting the cursor of the mouse on the bubble.

## <div class='alert alert-info'>Let's add annimation </div>


In [23]:
fig=px.scatter_3d(
data_frame=de,
x='Year',
y='Pay',
z='Endorsements',
color='Name',
color_discrete_sequence=['magenta','green','blue'],
log_y=True,  #we are adding a log scale for Pay, we can also do for x and z
template='ggplot2',
title='Highest Earnings of Athletes over the years',       
height=700,                 # height of graph in pixel
    
 animation_frame='Year'
)

pio.show(fig)

## <font color='green'>Points to Note</font>

- Here we have just given an annimation function which is  (animation_frame='Year') by which we will get a play button which will show us how the Pay or Endorsement is increasing gradually over the years for all the players

# How to save it in html and share it with others

In [24]:
#fig.write_html('Atheletes.html')