In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.__version__

'1.3.5'

#### Introducing A New Dataset

In [3]:

# NEW DATA!
# - English Premier League soccer players
# - 400+ players x 17 attributes


In [4]:
data_url = 'https://andybek.com/pandas-soccer' # as always, this will be attached too!

In [5]:
players = pd.read_csv(data_url)

In [6]:
players.info(verbose=False, memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 465 entries, 0 to 464
Columns: 17 entries, name to new_signing
dtypes: float64(2), int64(10), object(5)
memory usage: 190.7 KB


In [7]:
players.dtypes.value_counts()

int64      10
object      5
float64     2
dtype: int64

In [8]:
#players.memory_usage()

In [9]:
players.head(3)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0


#104: Quick Review: Indexing With Boolean Masks

In [10]:
# boolean indexing:
#   step 1: generate sequence of booleans
#   step 2: use boolean sequence in [] or .loc[]

In [11]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [12]:
# Q: what are the players that have a market value exceeding 40M?

In [13]:
players.market_value > 40

0       True
1       True
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Name: market_value, Length: 465, dtype: bool

In [14]:
players[players.market_value > 40].head(3)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
96,Eden Hazard,Chelsea,26,LW,1,75.0,4220,10.5,2.30%,224,2,Belgium,0,3,5,1,0


In [15]:
players[players.market_value > 40].shape

(13, 17)

## #105: More Approaches To Boolean Masking

In [16]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [17]:
players.position.unique()

array(['LW', 'AM', 'GK', 'RW', 'CB', 'RB', 'CF', 'LB', 'DM', 'RM', 'CM',
       nan, 'SS', 'LM'], dtype=object)

In [18]:
players.position.unique().size

14

In [19]:
# defender codes: LB, CB, RB

In [20]:
players.position.isin(['LB', 'CB', 'RB'])

0      False
1      False
2      False
3      False
4       True
       ...  
460    False
461     True
462     True
463    False
464    False
Name: position, Length: 465, dtype: bool

In [21]:
players.loc[players.position.isin(['LB', 'CB', 'RB'])]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
7,Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0
8,Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1
17,Gabriel Paulista,Arsenal,26,CB,3,13.0,552,5.0,0.10%,45,3,Brazil,0,3,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
458,Angelo Ogbonna,West+Ham,29,CB,3,9.0,247,4.5,1.10%,45,2,Italy,0,4,20,0,0
459,Pablo Zabaleta,West+Ham,32,RB,3,7.0,698,5.0,2.70%,45,3,Argentina,0,5,20,0,0
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1


In [22]:
players.market_value.between(40,50, inclusive=False)

  """Entry point for launching an IPython kernel.


0      False
1      False
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Name: market_value, Length: 465, dtype: bool

In [23]:
players[players.market_value.between(40,50, inclusive=True)]

  """Entry point for launching an IPython kernel.


Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
31,Alexandre Lacazette,Arsenal,26,CF,1,40.0,1183,10.5,26.50%,0,2,France,1,3,1,1,0
97,Diego Costa,Chelsea,28,CF,1,50.0,4454,10.0,3.00%,196,2,Spain,0,4,5,1,0
102,Thibaut Courtois,Chelsea,25,GK,4,40.0,1260,5.5,18.50%,141,2,Belgium,0,3,5,1,0
108,N%27Golo Kante,Chelsea,26,DM,2,50.0,4042,5.0,13.80%,83,2,France,0,3,5,1,1
218,Philippe Coutinho,Liverpool,25,AM,1,45.0,2958,9.0,30.80%,171,3,Brazil,0,3,10,1,0
219,Sadio Mane,Liverpool,25,LW,1,40.0,3219,9.5,5.30%,156,4,Senegal,0,3,10,1,1
246,Raheem Sterling,Manchester+City,22,LW,1,45.0,2074,8.0,3.80%,149,1,England,0,2,11,1,0
263,Bernardo Silva,Manchester+City,22,RW,1,40.0,1098,8.0,4.60%,0,2,Portugal,1,2,11,1,0
264,Romelu Lukaku,Manchester+United,24,CF,1,50.0,3727,11.5,45.00%,221,2,Belgium,0,2,12,1,0


In [24]:
players.age <= 25

0      False
1      False
2      False
3      False
4      False
       ...  
460     True
461     True
462     True
463     True
464    False
Name: age, Length: 465, dtype: bool

In [25]:
players.loc[players.age <= 25]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
8,Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1
9,Alex Iwobi,Arsenal,21,LW,1,10.0,1812,5.5,1.00%,89,4,Nigeria,0,1,1,1,0
10,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
11,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,Pedro Obiang,West+Ham,25,CM,2,9.0,286,4.5,0.30%,55,2,Spain,0,3,20,0,0
460,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,4.5,198,4.5,0.30%,29,1,England,0,2,20,0,0


In [26]:
players.loc[players.age.le(25)]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
8,Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1
9,Alex Iwobi,Arsenal,21,LW,1,10.0,1812,5.5,1.00%,89,4,Nigeria,0,1,1,1,0
10,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
11,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
456,Pedro Obiang,West+Ham,25,CM,2,9.0,286,4.5,0.30%,55,2,Spain,0,3,20,0,0
460,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,4.5,198,4.5,0.30%,29,1,England,0,2,20,0,0


In [27]:
players.age.le(25).equals(players.age <= 25)

True

# 106 Binary Operators With Booleans

In [28]:
# Binary OR -> |

In [29]:
True | False

True

In [30]:
False | False

False

In [31]:
False | True

True

In [32]:
True | True

True

In [33]:
# Binary AND -> &

In [34]:
True & False

False

In [35]:
False & True

False

In [36]:
False & False

False

In [37]:
True & True

True

In [38]:
# a single false is enough!

In [39]:
True & True & False & True & True

False

In [40]:
f = pd.Series(False)

In [41]:
f

0    False
dtype: bool

In [42]:
t = pd.Series(True)

In [43]:
t


0    True
dtype: bool

In [44]:
t & f

0    False
dtype: bool

In [45]:
t | f

0    True
dtype: bool

In [46]:
t = pd.Series([True if i%2 ==0 else False for i in range(10)])

In [47]:
t

0     True
1    False
2     True
3    False
4     True
5    False
6     True
7    False
8     True
9    False
dtype: bool

In [48]:
f = pd.Series([False for i in range(10)])

In [49]:
f

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool

In [50]:
t & f

0    False
1    False
2    False
3    False
4    False
5    False
6    False
7    False
8    False
9    False
dtype: bool

In [51]:
t | f

0     True
1    False
2     True
3    False
4     True
5    False
6     True
7    False
8     True
9    False
dtype: bool

In [52]:
f = pd.Series(data=[False, True, True], index=['c','b', 'a'])
t = pd.Series(data=[True, False, False], index=['a', 'b', 'c'])

In [53]:
f

c    False
b     True
a     True
dtype: bool

In [54]:
t

a     True
b    False
c    False
dtype: bool

In [55]:
f & t

a     True
b    False
c    False
dtype: bool

# 107: BONUS - XOR And Complement Binary Ops

In [56]:
# Binary XOR -> ^

In [57]:
True ^ False

True

In [58]:
False ^ False

False

In [59]:
True ^ True

False

In [60]:
True ^ (False | False & True) | False

True

In [61]:
# (Two's) Complement -> ~

In [62]:
# tilde (~) negates

In [63]:
~False

-1

In [64]:
~0

-1

In [65]:
~True

-2

In [66]:
~1

-2

In [67]:
t = pd.Series([True, True, False])

In [68]:
t

0     True
1     True
2    False
dtype: bool

In [69]:
~t

0    False
1    False
2     True
dtype: bool

## 108:**bold text** Combining Conditions

In [70]:
# select all the left backs -> LB

In [71]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [72]:
# all left backs who are 25 or younger

In [73]:
players[
        (players.position == 'LB') & 
        (players.age <= 25)
        ]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
54,Brad Smith,Bournemouth,23,LB,3,2.0,297,4.0,3.30%,4,4,Australia,0,2,2,0,0
112,Kenedy,Chelsea,21,LB,3,7.0,566,5.0,0.10%,3,3,Brazil,0,1,5,1,0
128,Jeffrey Schlupp,Crystal+Palace,24,LB,3,8.0,385,5.0,0.30%,47,4,Ghana,0,2,6,0,0
212,Ben Chilwell,Leicester+City,20,LB,3,2.5,288,4.5,0.80%,19,1,England,0,1,9,0,0
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0
294,Paul Dummett,Newcastle+United,25,LB,3,3.5,177,4.5,1.00%,0,2,Wales,0,3,13,0,0
298,Massadio Haidara,Newcastle+United,24,LB,3,1.5,114,4.0,0.50%,0,2,France,0,2,13,0,0
328,Matt Targett,Southampton,21,LB,3,3.0,110,4.5,0.20%,12,1,England,0,1,14,0,0


In [74]:
players[(players.position == 'LB') &(players.age <= 25)] \
  [["name","age","position"]] # in this line of code we just keep 3 columns and drop the rest of the columns. 

Unnamed: 0,name,age,position
29,Sead Kolasinac,24,LB
54,Brad Smith,23,LB
112,Kenedy,21,LB
128,Jeffrey Schlupp,24,LB
212,Ben Chilwell,20,LB
236,Alberto Moreno,25,LB
281,Luke Shaw,22,LB
294,Paul Dummett,25,LB
298,Massadio Haidara,24,LB
328,Matt Targett,21,LB


In [75]:
# 3 conditions: left backs, 25 or younger, market value 10+

In [76]:
players[
        (players.position == 'LB') & 
        (players.age <= 25) & 
        (players.market_value >= 10)
        ]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0
389,Ben Davies,Tottenham,24,LB,3,12.0,396,5.5,1.80%,90,2,Wales,0,2,17,1,0


In [77]:
# 4 conditions: left backs, 25 or younger, market value 10+, and NOT from Arsenal or Tottenham clubs

In [78]:
players[
        (players.position == 'LB') &
        (players.age <= 25) &
        (players.market_value >= 10) &
        ~(players.club.isin(['Tottenham', 'Arsenal']))
        ]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0


## 109:**bold text** Conditions As Variables# 

In [79]:
players[
        (players.position == 'LB') &
        (players.age <= 25) &
        (players.market_value >= 10) &
        ~(players.club.isin(['Tottenham', 'Arsenal']))
        ]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
236,Alberto Moreno,Liverpool,25,LB,3,10.0,397,4.5,0.30%,8,2,Spain,0,3,10,1,0
281,Luke Shaw,Manchester+United,22,LB,3,20.0,947,5.0,0.40%,45,1,England,0,2,12,1,0


In [80]:
# new target: Arsenal right backs and Chelsea goalkeepers

In [81]:
arsenal_player = players.club == 'Arsenal'

In [82]:
arsenal_player

0       True
1       True
2       True
3       True
4       True
       ...  
460    False
461    False
462    False
463    False
464    False
Name: club, Length: 465, dtype: bool

In [83]:
right_back = players.position == 'RB'

In [84]:
right_back

0      False
1      False
2      False
3      False
4      False
       ...  
460    False
461    False
462     True
463    False
464    False
Name: position, Length: 465, dtype: bool

In [85]:
chelsea_and_GK = (players.club == 'Chelsea') & (players.position == 'GK')

In [86]:
chelsea_and_GK

0      False
1      False
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Length: 465, dtype: bool

In [87]:
players.loc[arsenal_player & right_back | chelsea_and_GK]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
27,Carl Jenkinson,Arsenal,25,RB,3,5.0,561,4.5,0.40%,2,1,England,0,3,1,1,0
102,Thibaut Courtois,Chelsea,25,GK,4,40.0,1260,5.5,18.50%,141,2,Belgium,0,3,5,1,0
109,Willy Caballero,Chelsea,35,GK,4,1.5,542,5.0,0.20%,64,3,Argentina,0,6,5,1,0


# **110: Skill Challenge**

######Find the players in our dataset that meet these criteria

1.   they're English (*nationality*), and
2.   their market value is more than twice the average market value in the league (*market_value*), and
3. they either have more than 4,000 views (*page_views*) or are a new signing (*new_signing*) but not both

#### Solution

In [88]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [89]:
# first condition

In [90]:
english = players.nationality == 'England'

In [91]:
english.head()

0    False
1    False
2    False
3     True
4    False
Name: nationality, dtype: bool

In [92]:
# second condtion

In [93]:
players.market_value.mean()

11.125649350649349

In [94]:
above_average = players.market_value > players.market_value.mean() * 2

In [95]:
above_average.head()

0     True
1     True
2    False
3    False
4    False
Name: market_value, dtype: bool

In [96]:
# third condition

In [97]:
popular_xor_new = (players.page_views > 4000) ^ (players.new_signing == 1)

In [98]:
popular_xor_new.head()

0     True
1     True
2    False
3    False
4    False
dtype: bool

## NOTE: shape of the all 3 Condition must be the same otherwise it gives Error.

In [99]:
print(english.shape) # NOTE: shape of the all 3 Condition must be the same otherwise it gives Error.

(465,)


In [100]:
print(above_average.shape)
print(popular_xor_new.shape)
print(players.shape)

(465,)
(465,)
(465, 17)


In [101]:
players.loc[english & above_average & popular_xor_new]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
256,John Stones,Manchester+City,23,CB,3,35.0,1078,5.5,2.30%,59,1,England,0,2,11,1,1
380,Dele Alli,Tottenham,21,CM,2,45.0,4626,9.5,38.60%,225,1,England,0,1,17,1,0
381,Harry Kane,Tottenham,23,CF,1,60.0,4161,12.5,35.10%,224,1,England,0,2,17,1,0


# **112:2d Indexing**

In [102]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [103]:
# chelsea players and 23 years old or younger

In [104]:
chelsea_23under = (players.club == 'Chelsea') & (players.age.le(23))

In [105]:
chelsea_23under.head()

0    False
1    False
2    False
3    False
4    False
dtype: bool

In [106]:
players.loc[chelsea_23under, ['position', 'market_value']]

Unnamed: 0,position,market_value
110,CF,25.0
111,CB,15.0
112,LB,7.0
115,DM,16.0


In [107]:
# ...select all column that begin with 'p'

In [108]:
# startwith

In [109]:
p_cols = players.columns.str.startswith('p')

In [110]:
players.loc[chelsea_23under, p_cols]

Unnamed: 0,position,position_cat,page_views
110,CF,1,1162
111,CB,3,723
112,LB,3,566
115,DM,2,1011


In [111]:
print(chelsea_23under.shape)

(465,)


In [112]:
print(players.shape)

(465, 17)


In [113]:
print(p_cols.shape)

(17,)


### **[] chaining : ** [ ] Chaining

>**NOTE: in below codes both of the lines returns the same resutls
how ever second version is faster and better than first version.
it is becuase of the dender method in back end. 
__somethings__


In [114]:
players[chelsea_23under]['position']

110    CF
111    CB
112    LB
115    DM
Name: position, dtype: object

In [115]:
players.loc[chelsea_23under, 'position']

110    CF
111    CB
112    LB
115    DM
Name: position, dtype: object

# 113: Fancy Indexing With lookup()

In [116]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [117]:
players.loc[[0, 132], ('name', 'market_value')] # fancy

Unnamed: 0,name,market_value
0,Alexis Sanchez,65.0
132,Connor Wickham,6.0


In [118]:
players.lookup([0, 132], ['name', 'market_value'])

  """Entry point for launching an IPython kernel.


array(['Alexis Sanchez', 6.0], dtype=object)

In [119]:
players.lookup([0, 132], ['market_value', 'name'])

  """Entry point for launching an IPython kernel.


array([65.0, 'Connor Wickham'], dtype=object)

In [120]:
names = ['Petr Cech', 'Mesut Ozil', 'Alexis Sanchez']

In [121]:
attributes = ['age', 'market_value', 'page_views']

In [122]:
#players.set_index('name').lookup(names, attributes)

# 114: Sorting By Index Or Column

In [123]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [124]:
players.sort_values(by='market_value', ascending=False)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
96,Eden Hazard,Chelsea,26,LW,1,75.00,4220,10.5,2.30%,224,2,Belgium,0,3,5,1,0
267,Paul Pogba,Manchester+United,24,CM,2,75.00,7435,8.0,19.50%,115,2,France,0,2,12,1,1
0,Alexis Sanchez,Arsenal,28,LW,1,65.00,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
244,Kevin De Bruyne,Manchester+City,26,AM,1,65.00,2252,10.0,17.50%,199,2,Belgium,0,3,11,1,0
245,Sergio Aguero,Manchester+City,29,CF,1,65.00,4046,11.5,9.70%,175,3,Argentina,0,4,11,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
287,Joel Castro Pereira,Manchester+United,21,GK,4,0.10,395,4.0,1.00%,6,2,Portugal,0,1,12,1,0
113,Eduardo Carvalho,Chelsea,34,LW,1,0.05,467,5.0,0.10%,0,2,Portugal,0,6,5,1,1
30,Granit Xhaka,Arsenal,24,,2,,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,,56,6.0,0.60%,0,2,Benin,0,2,8,0,0


In [125]:
players.index

RangeIndex(start=0, stop=465, step=1)

In [126]:
players.set_index('name', inplace=True)

In [127]:
players.head(10)

Unnamed: 0_level_0,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
Olivier Giroud,Arsenal,30,CF,1,22.0,2230,8.5,2.50%,116,2,France,0,4,1,1,0
Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0
Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1
Alex Iwobi,Arsenal,21,LW,1,10.0,1812,5.5,1.00%,89,4,Nigeria,0,1,1,1,0


In [128]:
players.index

Index(['Alexis Sanchez', 'Mesut Ozil', 'Petr Cech', 'Theo Walcott',
       'Laurent Koscielny', 'Hector Bellerin', 'Olivier Giroud',
       'Nacho Monreal', 'Shkodran Mustafi', 'Alex Iwobi',
       ...
       'Aaron Cresswell', 'Pedro Obiang', 'Sofiane Feghouli', 'Angelo Ogbonna',
       'Pablo Zabaleta', 'Edimilson Fernandes', 'Arthur Masuaku', 'Sam Byram',
       'Ashley Fletcher', 'Diafra Sakho'],
      dtype='object', name='name', length=465)

In [129]:
players.sort_index(inplace=True)

In [130]:
players.head(10)

Unnamed: 0_level_0,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0
Adam Federici,Bournemouth,32,GK,4,1.0,126,4.0,1.50%,8,4,Australia,0,5,2,0,0
Adam Lallana,Liverpool,29,AM,1,25.0,1808,7.5,6.40%,139,1,England,0,4,10,1,0
Adam Smith,Bournemouth,26,RB,3,5.0,200,5.0,0.90%,104,1,England,0,3,2,0,0
Ademola Lookman,Everton,19,LW,1,5.0,1387,5.5,0.30%,16,1,England,0,1,7,0,0
Adrian,West+Ham,30,GK,4,8.0,266,4.5,0.80%,64,2,Spain,0,4,20,0,0


In [131]:
players.sort_index(axis=1)

Unnamed: 0_level_0,age,age_cat,big_club,club,club_id,fpl_points,fpl_sel,fpl_value,market_value,nationality,new_foreign,new_signing,page_views,position,position_cat,region
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Aaron Cresswell,27,3,0,West+Ham,20,60,1.30%,5.0,12.0,England,0,0,380,LB,3,1
Aaron Lennon,30,4,0,Everton,7,22,0.20%,5.5,5.0,England,0,0,504,RW,1,1
Aaron Mooy,26,3,0,Huddersfield,8,0,2.50%,5.5,5.0,Australia,0,0,588,CM,2,4
Aaron Ramsey,26,3,1,Arsenal,1,56,5.10%,7.0,35.0,Wales,0,0,1040,CM,2,1
Abdoulaye Doucoure,24,2,0,Watford,18,38,0.00%,5.0,6.0,France,0,0,124,CM,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Yohan Cabaye,31,4,0,Crystal+Palace,6,91,1.40%,5.5,15.0,France,0,0,456,CM,2,2
YounÃ¨s Kaboul,31,4,0,Watford,18,57,0.10%,4.5,2.5,France,0,1,263,CB,3,2
Ã‰tienne Capoue,29,4,0,Watford,18,131,8.00%,5.5,9.0,France,0,0,412,DM,2,2
Ã€ngel Rangel,34,6,0,Swansea,16,26,18.80%,4.0,1.0,Spain,0,0,137,RB,3,2


In [132]:
players.reset_index()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Yohan Cabaye,Crystal+Palace,31,CM,2,15.0,456,5.5,1.40%,91,2,France,0,4,6,0,0
461,YounÃ¨s Kaboul,Watford,31,CB,3,2.5,263,4.5,0.10%,57,2,France,0,4,18,0,1
462,Ã‰tienne Capoue,Watford,29,DM,2,9.0,412,5.5,8.00%,131,2,France,0,4,18,0,0
463,Ã€ngel Rangel,Swansea,34,RB,3,1.0,137,4.0,18.80%,26,2,Spain,0,6,16,0,0


In [133]:
# IF you use reset_index() more than once you will get some things like this as output:
players.reset_index().reset_index().reset_index()

Unnamed: 0,level_0,index,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,0,0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,1,1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,2,2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,3,3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,4,4,Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,460,460,Yohan Cabaye,Crystal+Palace,31,CM,2,15.0,456,5.5,1.40%,91,2,France,0,4,6,0,0
461,461,461,YounÃ¨s Kaboul,Watford,31,CB,3,2.5,263,4.5,0.10%,57,2,France,0,4,18,0,1
462,462,462,Ã‰tienne Capoue,Watford,29,DM,2,9.0,412,5.5,8.00%,131,2,France,0,4,18,0,0
463,463,463,Ã€ngel Rangel,Swansea,34,RB,3,1.0,137,4.0,18.80%,26,2,Spain,0,6,16,0,0


In [134]:
players.reset_index(inplace=True)

In [135]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0


In [136]:
# IF you need to sort your columns Alphabeticle:
players.sort_index(axis =1)

Unnamed: 0,age,age_cat,big_club,club,club_id,fpl_points,fpl_sel,fpl_value,market_value,name,nationality,new_foreign,new_signing,page_views,position,position_cat,region
0,27,3,0,West+Ham,20,60,1.30%,5.0,12.0,Aaron Cresswell,England,0,0,380,LB,3,1
1,30,4,0,Everton,7,22,0.20%,5.5,5.0,Aaron Lennon,England,0,0,504,RW,1,1
2,26,3,0,Huddersfield,8,0,2.50%,5.5,5.0,Aaron Mooy,Australia,0,0,588,CM,2,4
3,26,3,1,Arsenal,1,56,5.10%,7.0,35.0,Aaron Ramsey,Wales,0,0,1040,CM,2,1
4,24,2,0,Watford,18,38,0.00%,5.0,6.0,Abdoulaye Doucoure,France,0,0,124,CM,2,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,31,4,0,Crystal+Palace,6,91,1.40%,5.5,15.0,Yohan Cabaye,France,0,0,456,CM,2,2
461,31,4,0,Watford,18,57,0.10%,4.5,2.5,YounÃ¨s Kaboul,France,0,1,263,CB,3,2
462,29,4,0,Watford,18,131,8.00%,5.5,9.0,Ã‰tienne Capoue,France,0,0,412,DM,2,2
463,34,6,0,Swansea,16,26,18.80%,4.0,1.0,Ã€ngel Rangel,Spain,0,0,137,RB,3,2


# **115:Sorting vs. Reordering**

In [137]:
# reindex()

In [138]:
players_lite = players.iloc[:4, :4]

In [139]:
players_lite

Unnamed: 0,name,club,age,position
0,Aaron Cresswell,West+Ham,27,LB
1,Aaron Lennon,Everton,30,RW
2,Aaron Mooy,Huddersfield,26,CM
3,Aaron Ramsey,Arsenal,26,CM


In [140]:
# row order: 2, 1, 3, 0
# column order: age, name, position, club

In [141]:
players_lite.reindex(index=[2,1,3,0], columns=['age', 'name', 'position', 'club'])

Unnamed: 0,age,name,position,club
2,26,Aaron Mooy,CM,Huddersfield
1,30,Aaron Lennon,RW,Everton
3,26,Aaron Ramsey,CM,Arsenal
0,27,Aaron Cresswell,LB,West+Ham


In [142]:
players.reindex(index=[2,1,3,0])

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0


In [143]:
# ...get all the columns, and have them alphabetically ordered

In [144]:
players.reindex(index=[2,1,3,0]).sort_index(axis=1)

Unnamed: 0,age,age_cat,big_club,club,club_id,fpl_points,fpl_sel,fpl_value,market_value,name,nationality,new_foreign,new_signing,page_views,position,position_cat,region
2,26,3,0,Huddersfield,8,0,2.50%,5.5,5.0,Aaron Mooy,Australia,0,0,588,CM,2,4
1,30,4,0,Everton,7,22,0.20%,5.5,5.0,Aaron Lennon,England,0,0,504,RW,1,1
3,26,3,1,Arsenal,1,56,5.10%,7.0,35.0,Aaron Ramsey,Wales,0,0,1040,CM,2,1
0,27,3,0,West+Ham,20,60,1.30%,5.0,12.0,Aaron Cresswell,England,0,0,380,LB,3,1


In [145]:
players.reindex(index=[2,1,3,0], columns=[])

2
1
3
0


In [146]:
# how do we get a sorted list of column labels?

In [147]:
# ===ASIDE===

In [148]:
players.columns

Index(['name', 'club', 'age', 'position', 'position_cat', 'market_value',
       'page_views', 'fpl_value', 'fpl_sel', 'fpl_points', 'region',
       'nationality', 'new_foreign', 'age_cat', 'club_id', 'big_club',
       'new_signing'],
      dtype='object')

In [149]:
iter(players.columns)

<map at 0x7f6cfae10610>

In [150]:
# iter(True)

In [151]:
sorted(players.columns)

['age',
 'age_cat',
 'big_club',
 'club',
 'club_id',
 'fpl_points',
 'fpl_sel',
 'fpl_value',
 'market_value',
 'name',
 'nationality',
 'new_foreign',
 'new_signing',
 'page_views',
 'position',
 'position_cat',
 'region']

In [152]:
# ===END ASIDE===

In [153]:
players.reindex(index=[2,1,3,0], columns=sorted(players.columns)[:6])

Unnamed: 0,age,age_cat,big_club,club,club_id,fpl_points
2,26,3,0,Huddersfield,8,0
1,30,4,0,Everton,7,22
3,26,3,1,Arsenal,1,56
0,27,3,0,West+Ham,20,60


# 116: BONUS - Another Way

In [154]:
players.reindex(index=[2,1,3,0], columns=sorted(players.columns))

Unnamed: 0,age,age_cat,big_club,club,club_id,fpl_points,fpl_sel,fpl_value,market_value,name,nationality,new_foreign,new_signing,page_views,position,position_cat,region
2,26,3,0,Huddersfield,8,0,2.50%,5.5,5.0,Aaron Mooy,Australia,0,0,588,CM,2,4
1,30,4,0,Everton,7,22,0.20%,5.5,5.0,Aaron Lennon,England,0,0,504,RW,1,1
3,26,3,1,Arsenal,1,56,5.10%,7.0,35.0,Aaron Ramsey,Wales,0,0,1040,CM,2,1
0,27,3,0,West+Ham,20,60,1.30%,5.0,12.0,Aaron Cresswell,England,0,0,380,LB,3,1


In [155]:
sorted(players.columns)

['age',
 'age_cat',
 'big_club',
 'club',
 'club_id',
 'fpl_points',
 'fpl_sel',
 'fpl_value',
 'market_value',
 'name',
 'nationality',
 'new_foreign',
 'new_signing',
 'page_views',
 'position',
 'position_cat',
 'region']

In [156]:
players.columns.sort_values()

Index(['age', 'age_cat', 'big_club', 'club', 'club_id', 'fpl_points',
       'fpl_sel', 'fpl_value', 'market_value', 'name', 'nationality',
       'new_foreign', 'new_signing', 'page_views', 'position', 'position_cat',
       'region'],
      dtype='object')

In [157]:
players.reindex(index=[2,1,3,0], columns=players.columns.sort_values())

Unnamed: 0,age,age_cat,big_club,club,club_id,fpl_points,fpl_sel,fpl_value,market_value,name,nationality,new_foreign,new_signing,page_views,position,position_cat,region
2,26,3,0,Huddersfield,8,0,2.50%,5.5,5.0,Aaron Mooy,Australia,0,0,588,CM,2,4
1,30,4,0,Everton,7,22,0.20%,5.5,5.0,Aaron Lennon,England,0,0,504,RW,1,1
3,26,3,1,Arsenal,1,56,5.10%,7.0,35.0,Aaron Ramsey,Wales,0,0,1040,CM,2,1
0,27,3,0,West+Ham,20,60,1.30%,5.0,12.0,Aaron Cresswell,England,0,0,380,LB,3,1


# 117:BONUS: Please Avoid Sorting Like This

In [158]:
# anti-pattern - please do not do this!!!

In [159]:
df = players.iloc[:6, :6]

In [160]:
df

Unnamed: 0,name,club,age,position,position_cat,market_value
0,Aaron Cresswell,West+Ham,27,LB,3,12.0
1,Aaron Lennon,Everton,30,RW,1,5.0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0
4,Abdoulaye Doucoure,Watford,24,CM,2,6.0
5,Adam Federici,Bournemouth,32,GK,4,1.0


In [161]:
df.swapaxes(1, 0)

Unnamed: 0,0,1,2,3,4,5
name,Aaron Cresswell,Aaron Lennon,Aaron Mooy,Aaron Ramsey,Abdoulaye Doucoure,Adam Federici
club,West+Ham,Everton,Huddersfield,Arsenal,Watford,Bournemouth
age,27,30,26,26,24,32
position,LB,RW,CM,CM,CM,GK
position_cat,3,1,2,2,2,4
market_value,12.0,5.0,5.0,35.0,6.0,1.0


In [162]:
df.T.sort_index().T

Unnamed: 0,age,club,market_value,name,position,position_cat
0,27,West+Ham,12.0,Aaron Cresswell,LB,3
1,30,Everton,5.0,Aaron Lennon,RW,1
2,26,Huddersfield,5.0,Aaron Mooy,CM,2
3,26,Arsenal,35.0,Aaron Ramsey,CM,2
4,24,Watford,6.0,Abdoulaye Doucoure,CM,2
5,32,Bournemouth,1.0,Adam Federici,GK,4


In [163]:
df.sort_index(axis=1)

Unnamed: 0,age,club,market_value,name,position,position_cat
0,27,West+Ham,12.0,Aaron Cresswell,LB,3
1,30,Everton,5.0,Aaron Lennon,RW,1
2,26,Huddersfield,5.0,Aaron Mooy,CM,2
3,26,Arsenal,35.0,Aaron Ramsey,CM,2
4,24,Watford,6.0,Abdoulaye Doucoure,CM,2
5,32,Bournemouth,1.0,Adam Federici,GK,4


## 118:**bold text** Skill Challenge

###### **1.**

Sort the players in the **players** dataframe by age in ascending order. Who is the youngest footballer in the EPL?

###### **2**. 

Set the *club* column as the index of the dataframe. Then sort the dataframe index in alphabetical order. Make sure these changes are applied to the underlying dataframe and carry over to the next question. 

###### **3**.

Sort the dataframe values by *club* and *market_value* where the club is alphabetical (Arsenal first) and the market value is in descending order (within each team, the most valuable players first).

#### Solution

In [164]:
# 1

In [165]:
players.sort_values(by='age', ascending=True)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
53,Ben Woodburn,Liverpool,17,LW,1,1.50,1241,4.5,0.10%,5,1,Wales,0,1,10,1,0
217,Jonathan Leko,West+Brom,18,RW,1,1.50,169,4.5,0.20%,12,1,England,0,1,19,0,0
434,Trent Alexander-Arnold,Liverpool,18,RB,3,1.50,327,4.5,0.30%,15,2,England,0,1,10,1,0
229,Josh Tymon,Stoke+City,18,LB,3,1.00,120,4.5,0.10%,9,1,England,0,1,15,0,0
45,Axel Tuanzebe,Manchester+United,19,CB,3,1.00,279,4.0,1.70%,14,1,England,0,1,12,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
142,Gareth Barry,Everton,36,DM,2,1.50,1331,4.5,1.70%,68,1,England,0,6,7,0,0
90,Damien Delaney,Crystal+Palace,36,CB,3,1.00,195,4.5,0.60%,51,2,Ireland,0,6,6,0,0
38,Artur Boruc,Bournemouth,37,GK,4,1.00,436,4.5,6.90%,120,2,Poland,0,6,2,0,0
143,Gareth McAuley,West+Brom,37,CB,3,1.00,458,5.0,11.80%,131,2,Northern Ireland,0,6,19,0,0


In [166]:
# idxmin()!

In [167]:
players.iloc[players.age.idxmin()]

name            Ben Woodburn
club               Liverpool
age                       17
position                  LW
position_cat               1
market_value             1.5
page_views              1241
fpl_value                4.5
fpl_sel                0.10%
fpl_points                 5
region                     1
nationality            Wales
new_foreign                0
age_cat                    1
club_id                   10
big_club                   1
new_signing                0
Name: 53, dtype: object

In [168]:
# 2

In [169]:
players.set_index('club').sort_index(inplace=True)

In [170]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0


In [171]:
# 3

In [172]:
players.sort_values(by=['club', 'market_value'], ascending=[True, False])

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
21,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
306,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
20,Alexandre Lacazette,Arsenal,26,CF,1,40.0,1183,10.5,26.50%,0,2,France,1,3,1,1,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
152,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
384,Sam Byram,West+Ham,23,RB,3,4.5,198,4.5,0.30%,29,1,England,0,2,20,0,0
99,Darren Randolph,West+Ham,30,GK,4,2.5,459,4.5,0.40%,69,2,Ireland,0,4,20,0,0
179,James Collins,West+Ham,33,CB,3,2.0,187,4.5,0.90%,69,2,Wales,0,5,20,0,0


# **120 Identifying Dupes**

In [173]:
players.head(3)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0


In [174]:
players.duplicated()

0      False
1      False
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Length: 465, dtype: bool

In [175]:
players[players.duplicated()]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
18,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
19,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
154,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0


In [176]:
# the subset param

In [177]:
# unique -> club, age, position, market_value

In [178]:
players.loc[players.duplicated(subset=['club', 'age', 'position', 'market_value'])]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
18,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
19,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
134,Fernando,Manchester+City,32,DM,2,18.0,338,4.5,0.40%,18,3,Brazil,0,5,11,1,0
154,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
252,Lascelles,Newcastle+United,27,CB,3,5.0,400,4.5,3.60%,0,1,England,0,3,13,0,0
281,Marcos Rojo,Manchester+United,27,CB,3,18.0,1063,5.5,0.10%,77,3,Argentina,0,3,12,1,0
399,Shane Duffy,Brighton+and+Hove,25,CB,3,5.0,243,4.5,0.60%,0,2,Ireland,0,3,3,0,0


In [179]:
players.loc[players.duplicated(subset=['club', 'age', 'position', 'market_value'])].shape

(7, 17)

In [180]:
# A - first -> original
# A
# A - last -> original

In [181]:
players.loc[players.duplicated(subset=['club', 'age', 'position', 'market_value'], keep='last')]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
17,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
18,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
72,Chris Smalling,Manchester+United,27,CB,3,18.0,834,5.5,1.30%,52,1,England,0,3,12,1,0
78,Ciaran Clark,Newcastle+United,27,CB,3,5.0,273,4.5,0.90%,0,2,Ireland,0,3,13,0,0
133,Fernandinho,Manchester+City,32,DM,2,18.0,595,5.0,0.80%,78,3,Brazil,0,5,11,1,0
152,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
263,Lewis Dunk,Brighton+and+Hove,25,CB,3,5.0,140,4.5,4.10%,0,1,England,0,3,3,0,0


In [182]:
players.loc[players.duplicated(subset=['club', 'age', 'position', 'market_value'], keep='last')].shape

(7, 17)

In [183]:
players.loc[players.duplicated(subset=['club', 'age', 'position', 'market_value'], keep=False)]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
17,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
18,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
19,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
72,Chris Smalling,Manchester+United,27,CB,3,18.0,834,5.5,1.30%,52,1,England,0,3,12,1,0
78,Ciaran Clark,Newcastle+United,27,CB,3,5.0,273,4.5,0.90%,0,2,Ireland,0,3,13,0,0
133,Fernandinho,Manchester+City,32,DM,2,18.0,595,5.0,0.80%,78,3,Brazil,0,5,11,1,0
134,Fernando,Manchester+City,32,DM,2,18.0,338,4.5,0.40%,18,3,Brazil,0,5,11,1,0
152,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
154,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
252,Lascelles,Newcastle+United,27,CB,3,5.0,400,4.5,3.60%,0,1,England,0,3,13,0,0


In [184]:
players.loc[players.duplicated(subset=['club', 'age', 'position', 'market_value'], keep=False)].shape

(13, 17)

# 121: Removing Duplicates

In [185]:
players[players.duplicated(subset=None, keep='first')]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
18,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
19,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
154,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0


In [186]:
players.market_value.mean()

11.125649350649349

In [187]:
players.drop_duplicates(keep='first').market_value.mean()

11.026252723311545

In [188]:
# duplicates are not necessarily bad!

# 122: Removing DataFrame Rows

In [189]:
players[players.duplicated()]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
18,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
19,Alex Oxlade-Chamberlain,Arsenal,23,RM,2,22.0,1519,6.0,1.80%,83,1,England,0,2,1,1,0
154,Granit Xhaka,Arsenal,24,DM,2,35.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0


In [190]:
players.drop(labels=19, axis=0)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Yohan Cabaye,Crystal+Palace,31,CM,2,15.0,456,5.5,1.40%,91,2,France,0,4,6,0,0
461,YounÃ¨s Kaboul,Watford,31,CB,3,2.5,263,4.5,0.10%,57,2,France,0,4,18,0,1
462,Ã‰tienne Capoue,Watford,29,DM,2,9.0,412,5.5,8.00%,131,2,France,0,4,18,0,0
463,Ã€ngel Rangel,Swansea,34,RB,3,1.0,137,4.0,18.80%,26,2,Spain,0,6,16,0,0


In [191]:
players.drop(index=19)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Yohan Cabaye,Crystal+Palace,31,CM,2,15.0,456,5.5,1.40%,91,2,France,0,4,6,0,0
461,YounÃ¨s Kaboul,Watford,31,CB,3,2.5,263,4.5,0.10%,57,2,France,0,4,18,0,1
462,Ã‰tienne Capoue,Watford,29,DM,2,9.0,412,5.5,8.00%,131,2,France,0,4,18,0,0
463,Ã€ngel Rangel,Swansea,34,RB,3,1.0,137,4.0,18.80%,26,2,Spain,0,6,16,0,0


In [192]:
# we can drop more than one row in one go by creating list of the rows we want to drop from dataFrame: 
players.drop(index=[19, 20, 21, 231, 10])

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,24,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Yohan Cabaye,Crystal+Palace,31,CM,2,15.0,456,5.5,1.40%,91,2,France,0,4,6,0,0
461,YounÃ¨s Kaboul,Watford,31,CB,3,2.5,263,4.5,0.10%,57,2,France,0,4,18,0,1
462,Ã‰tienne Capoue,Watford,29,DM,2,9.0,412,5.5,8.00%,131,2,France,0,4,18,0,0
463,Ã€ngel Rangel,Swansea,34,RB,3,1.0,137,4.0,18.80%,26,2,Spain,0,6,16,0,0


In [193]:
players.shape

(465, 17)

# 123: BONUS - Removing Columns

In [194]:
# "remove rows" -> df.drop(labels=[19, 20], axis=0) or df.drop(index=[19, 20])

In [195]:
# "remove columns" -> ???

In [196]:
players.drop(labels=['age', 'market_value'], axis='columns')

Unnamed: 0,name,club,position,position_cat,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,LB,3,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,RW,1,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,CM,2,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,CM,2,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,CM,2,124,5.0,0.00%,38,2,France,0,2,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Yohan Cabaye,Crystal+Palace,CM,2,456,5.5,1.40%,91,2,France,0,4,6,0,0
461,YounÃ¨s Kaboul,Watford,CB,3,263,4.5,0.10%,57,2,France,0,4,18,0,1
462,Ã‰tienne Capoue,Watford,DM,2,412,5.5,8.00%,131,2,France,0,4,18,0,0
463,Ã€ngel Rangel,Swansea,RB,3,137,4.0,18.80%,26,2,Spain,0,6,16,0,0


In [197]:
players.columns

Index(['name', 'club', 'age', 'position', 'position_cat', 'market_value',
       'page_views', 'fpl_value', 'fpl_sel', 'fpl_points', 'region',
       'nationality', 'new_foreign', 'age_cat', 'club_id', 'big_club',
       'new_signing'],
      dtype='object')

In [198]:
players.drop(columns=['age', 'market_value', 'name'])

Unnamed: 0,club,position,position_cat,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,West+Ham,LB,3,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Everton,RW,1,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Huddersfield,CM,2,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Arsenal,CM,2,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Watford,CM,2,124,5.0,0.00%,38,2,France,0,2,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Crystal+Palace,CM,2,456,5.5,1.40%,91,2,France,0,4,6,0,0
461,Watford,CB,3,263,4.5,0.10%,57,2,France,0,4,18,0,1
462,Watford,DM,2,412,5.5,8.00%,131,2,France,0,4,18,0,0
463,Swansea,RB,3,137,4.0,18.80%,26,2,Spain,0,6,16,0,0


# **124:BONUS - Another Way: pop()**

In [199]:
players.pop('age')

0      27
1      30
2      26
3      26
4      24
       ..
460    31
461    31
462    29
463    34
464    32
Name: age, Length: 465, dtype: int64

In [200]:
# players.pop(['club', 'age']) 

In [201]:
# players.pop('club') and then players.pop('age')

In [202]:
players.columns

Index(['name', 'club', 'position', 'position_cat', 'market_value',
       'page_views', 'fpl_value', 'fpl_sel', 'fpl_points', 'region',
       'nationality', 'new_foreign', 'age_cat', 'club_id', 'big_club',
       'new_signing'],
      dtype='object')

In [203]:
players.head(4)

Unnamed: 0,name,club,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0


# 125: BONUS - A Sophisticated Alternative

In [204]:
# reindex to remove rows/columns

In [205]:
players.reindex()

Unnamed: 0,name,club,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
1,Aaron Lennon,Everton,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0
2,Aaron Mooy,Huddersfield,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0
3,Aaron Ramsey,Arsenal,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
4,Abdoulaye Doucoure,Watford,CM,2,6.0,124,5.0,0.00%,38,2,France,0,2,18,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Yohan Cabaye,Crystal+Palace,CM,2,15.0,456,5.5,1.40%,91,2,France,0,4,6,0,0
461,YounÃ¨s Kaboul,Watford,CB,3,2.5,263,4.5,0.10%,57,2,France,0,4,18,0,1
462,Ã‰tienne Capoue,Watford,DM,2,9.0,412,5.5,8.00%,131,2,France,0,4,18,0,0
463,Ã€ngel Rangel,Swansea,RB,3,1.0,137,4.0,18.80%,26,2,Spain,0,6,16,0,0


In [206]:
players.reindex(index=[0,3,9])

Unnamed: 0,name,club,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Aaron Cresswell,West+Ham,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
3,Aaron Ramsey,Arsenal,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0
9,Adrian,West+Ham,GK,4,8.0,266,4.5,0.80%,64,2,Spain,0,4,20,0,0


In [207]:
unwanted_rows = [1,2,3,4]
unwanted_columns = ['name', 'position', 'position_cat']

In [208]:
players.reindex(
    index=set(players.index).difference(unwanted_rows),
    columns=set(players.columns).difference(unwanted_columns)
    )

Unnamed: 0,new_foreign,club_id,new_signing,age_cat,market_value,page_views,fpl_points,nationality,region,club,fpl_value,fpl_sel,big_club
0,0,20,0,3,12.0,380,60,England,1,West+Ham,5.0,1.30%,0
5,0,2,0,5,1.0,126,8,Australia,4,Bournemouth,4.0,1.50%,0
6,0,10,0,4,25.0,1808,139,England,1,Liverpool,7.5,6.40%,1
7,0,2,0,3,5.0,200,104,England,1,Bournemouth,5.0,0.90%,0
8,0,7,0,1,5.0,1387,16,England,1,Everton,5.5,0.30%,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,0,6,0,4,15.0,456,91,France,2,Crystal+Palace,5.5,1.40%,0
461,0,18,1,4,2.5,263,57,France,2,Watford,4.5,0.10%,0
462,0,18,0,4,9.0,412,131,France,2,Watford,5.5,8.00%,0
463,0,16,0,6,1.0,137,26,Spain,2,Swansea,4.0,18.80%,0


# 126: Null Values In DataFrames

In [209]:
players = pd.read_csv('https://andybek.com/pandas-soccer')

In [210]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [211]:
players.shape

(465, 17)

In [212]:
# series refresher

In [213]:
ages = players.age

In [214]:
type(players.age)

pandas.core.series.Series

In [215]:
ages.isna()

0      False
1      False
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Name: age, Length: 465, dtype: bool

In [216]:
ages[ages.isna()]

Series([], Name: age, dtype: int64)

In [217]:
# dataframes

In [218]:
players.isna()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
1,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
2,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
3,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
4,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
461,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
462,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
463,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False


In [219]:
# how many NAs are there in this dataframe?

In [220]:
np.count_nonzero(players.isna())

4

In [221]:
# all the Null values in DataFrame.
players[players.isnull().values]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,,2,,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
30,Granit Xhaka,Arsenal,24,,2,,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [222]:
players["market_value"].isnull()

0      False
1      False
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Name: market_value, Length: 465, dtype: bool

In [223]:
# ONLY one column
players["market_value"].isnull().sum()

3

In [224]:
# which records have NAs in this dataframe?

In [225]:
players[players.isna().values].drop_duplicates()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,,2,,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


# 127:Dropping And Filling DataFrame NAs

In [226]:
players.fillna('some meaningful replacement value').loc[[30, 192, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,some meaningful replacement value,2,some meaningful replacement value,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,some meaningful replacement value,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,some meaningful replacement value,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [227]:
players.fillna({
    'market_value': 100,
    'position': 'RM'
}).loc[[30, 192, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,RM,2,100.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,100.0,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,100.0,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [228]:
players.position.unique()

array(['LW', 'AM', 'GK', 'RW', 'CB', 'RB', 'CF', 'LB', 'DM', 'RM', 'CM',
       nan, 'SS', 'LM'], dtype=object)

In [229]:
# Filling the market_value with mean() value of the market_value an position with RM
players.fillna({
    'market_value': players.market_value.mean(),
    'position': 'RM'
}).loc[[30, 192, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,RM,2,11.125649,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,11.125649,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,11.125649,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [230]:
# how about dropping nulls?

In [231]:
players.dropna(axis=1, how='all').loc[[30, 192, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,,2,,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


# **128: BONUS - Method And Axes With fillna()**

In [232]:
players[players.isna().values].drop_duplicates()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,,2,,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [233]:
# players.fillna('some meaningful replacement value').loc[[30, 192, 195]]

# players.fillna({ 'market_value': 100, 'position': 'RM' }).loc[[30, 192, 195]]

In [234]:
players.fillna(method='ffill').loc[[29, 30, 191, 192, 194, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
30,Granit Xhaka,Arsenal,24,LB,2,15.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
191,Laurent Depoitre,Huddersfield,28,CF,1,3.0,212,5.5,0.30%,0,2,Belgium,0,4,8,0,0
192,Steve Mounie,Huddersfield,22,CF,1,3.0,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
194,Riyad Mahrez,Leicester+City,26,RW,1,30.0,1753,8.5,1.70%,120,4,Algeria,0,3,9,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,30.0,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [235]:
players.fillna(method='ffill', axis=0).loc[[29, 30, 191, 192, 194, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
29,Sead Kolasinac,Arsenal,24,LB,3,15.0,618,6.0,6.90%,0,2,Bosnia,1,2,1,1,0
30,Granit Xhaka,Arsenal,24,LB,2,15.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
191,Laurent Depoitre,Huddersfield,28,CF,1,3.0,212,5.5,0.30%,0,2,Belgium,0,4,8,0,0
192,Steve Mounie,Huddersfield,22,CF,1,3.0,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
194,Riyad Mahrez,Leicester+City,26,RW,1,30.0,1753,8.5,1.70%,120,4,Algeria,0,3,9,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,30.0,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [236]:
players.fillna(method='pad', axis=1).loc[[30, 192, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,24,2,2,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,1,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,4,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [237]:
# 'ffill' -> 'pad'
# 'bfill' -> 'backfill'

In [238]:
# word of the lecture: duo -> pair

# **129: Skill Challenge**

###### **1**.

From our *players* dataframe remove the rows labeled 2, 10, 21 and the market_value columns. Do not modify the underlying dataframe. Assign the result to _df2_.

###### **2**.

Does the _nationality_ column in _df2_ contain any NA values? How many unique nationalities are there?

###### **3**.

Starting from _df2_, isolate a dataframe slice of players that contains only the unique _age_-_position_ combinations for each _club_. Do not include the _club_ column itself.

#### Solution

In [239]:
players.head(3)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0


In [240]:
# 1

In [241]:
df2 = players.drop(labels=[2,10,21], axis=0)\
       .drop(labels='market_value', axis=1)

In [242]:
# alternative
players.drop(index=[2, 10, 21], columns='market_value')

Unnamed: 0,name,club,age,position,position_cat,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,912,6.0,0.70%,121,2,France,0,4,1,1,0
5,Hector Bellerin,Arsenal,22,RB,3,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Edimilson Fernandes,West+Ham,21,CM,2,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,198,4.5,0.30%,29,1,England,0,2,20,0,0
463,Ashley Fletcher,West+Ham,21,CF,1,412,4.5,5.90%,16,1,England,0,1,20,0,1


In [243]:
# 2

In [244]:
df2.nationality.isnull().sum()

0

In [245]:
df2.nationality[df2.nationality.isnull()]

Series([], Name: nationality, dtype: object)

In [246]:
# how many uniques

In [247]:
df2.nationality.drop_duplicates().size

61

In [248]:
# an alternative

In [249]:
df2.nationality.nunique()

61

In [250]:
# 3

In [251]:
df2.drop_duplicates(subset=['age', 'club', 'position'], keep='first').loc[:, ['age', 'position']]

Unnamed: 0,age,position
0,28,LW
1,28,AM
3,28,RW
4,31,CB
5,22,RB
...,...,...
460,21,CM
461,23,LB
462,23,RB
463,21,CF


In [252]:
df2.shape

(462, 16)

# **131: Calculating Aggregates With agg()**

In [253]:
players.agg('mean')

  return f(*args, **kwargs)


age              26.776344
position_cat      2.178495
market_value     11.125649
page_views      771.546237
fpl_value         5.450538
fpl_points       57.544086
region            1.989247
new_foreign       0.034409
age_cat           3.195699
club_id          10.253763
big_club          0.309677
new_signing       0.144086
dtype: float64

In [254]:
players.new_signing.mean()

0.14408602150537633

In [255]:
players.agg(np.min)

  return getattr(obj, f)()


name            Aaron Cresswell
club                    Arsenal
age                          17
position_cat                  1
market_value               0.05
page_views                    3
fpl_value                   4.0
fpl_sel                   0.00%
fpl_points                    0
region                        1
nationality             Algeria
new_foreign                   0
age_cat                       1
club_id                       1
big_club                      0
new_signing                   0
dtype: object

In [256]:
# ===ASIDE

In [257]:
# >, <, <=, >= should have some meaning

In [258]:
'a' < 'b'

True

In [259]:
ord('a')

97

In [260]:
ord('b')

98

In [261]:
ls = ['a', 'b', 'c', 'day', 19]

In [262]:
# max(ls)

In [263]:
ord('d')

100

In [264]:
# ===END ASIDE

In [265]:
players.agg('min')

  return f(*args, **kwargs)


name            Aaron Cresswell
club                    Arsenal
age                          17
position_cat                  1
market_value               0.05
page_views                    3
fpl_value                   4.0
fpl_sel                   0.00%
fpl_points                    0
region                        1
nationality             Algeria
new_foreign                   0
age_cat                       1
club_id                       1
big_club                      0
new_signing                   0
dtype: object

In [266]:
players.select_dtypes(np.number).agg('min')

age             17.00
position_cat     1.00
market_value     0.05
page_views       3.00
fpl_value        4.00
fpl_points       0.00
region           1.00
new_foreign      0.00
age_cat          1.00
club_id          1.00
big_club         0.00
new_signing      0.00
dtype: float64

In [267]:
players.select_dtypes(np.number).agg(['min', 'max', 'mean'])

Unnamed: 0,age,position_cat,market_value,page_views,fpl_value,fpl_points,region,new_foreign,age_cat,club_id,big_club,new_signing
min,17.0,1.0,0.05,3.0,4.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0
max,38.0,4.0,75.0,7664.0,12.5,264.0,4.0,1.0,6.0,20.0,1.0,1.0
mean,26.776344,2.178495,11.125649,771.546237,5.450538,57.544086,1.989247,0.034409,3.195699,10.253763,0.309677,0.144086


# 132: Same-shape Transforms

In [268]:
players.head(3)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0


In [269]:
# we need an fx rate

In [270]:
# usdeur = 0.91

In [271]:
players.loc[:, ['market_value', 'fpl_value']]

Unnamed: 0,market_value,fpl_value
0,65.0,12.0
1,50.0,9.5
2,7.0,5.5
3,20.0,7.5
4,22.0,6.0
...,...,...
460,5.0,4.5
461,7.0,4.5
462,4.5,4.5
463,1.0,4.5


In [272]:
players.loc[:, ['market_value', 'fpl_value']].transform(lambda x: x * 0.91)

Unnamed: 0,market_value,fpl_value
0,59.150,10.920
1,45.500,8.645
2,6.370,5.005
3,18.200,6.825
4,20.020,5.460
...,...,...
460,4.550,4.095
461,6.370,4.095
462,4.095,4.095
463,0.910,4.095


In [273]:
players.loc[:, ['market_value', 'fpl_value']] * 0.91

Unnamed: 0,market_value,fpl_value
0,59.150,10.920
1,45.500,8.645
2,6.370,5.005
3,18.200,6.825
4,20.020,5.460
...,...,...
460,4.550,4.095
461,6.370,4.095
462,4.095,4.095
463,0.910,4.095


In [274]:
# ===ASIDE

In [275]:
# 1. choice

In [276]:
from random import choice

In [277]:
names = ['Bud', 'Brooke', 'Paleo']

In [278]:
choice(names)

'Bud'

In [279]:
# 2. str methods

In [280]:
'Andy'.upper()

'ANDY'

In [281]:
ser = pd.Series(['Bud CriMSon', 'Brooke', 'Paleo'])

In [282]:
ser.str.upper()

0    BUD CRIMSON
1         BROOKE
2          PALEO
dtype: object

In [283]:
ser.str.title()

0    Bud Crimson
1         Brooke
2          Paleo
dtype: object

In [284]:
ser.str.swapcase()

0    bUD cRImsON
1         bROOKE
2          pALEO
dtype: object

In [285]:
# ===END ASIDE

In [286]:
# the function should:
# - apply a random string capitalization method
# - to a sequence of values, and
# - return the transformed sequence

In [287]:
def random_case(x):
  funcs = [x.str.swapcase, x.str.lower, x.str.title, x.str.upper]

  return choice(funcs)()

In [288]:
for i in range(4):
  print(players.select_dtypes(include=object).transform(random_case).head())

                name     club position fpl_sel     nationality
0     alexis sanchez  aRSENAL       Lw  17.10%           Chile
1         mesut ozil  aRSENAL       Am   5.60%         Germany
2          petr cech  aRSENAL       Gk   5.90%  Czech Republic
3       theo walcott  aRSENAL       Rw   1.50%         England
4  laurent koscielny  aRSENAL       Cb   0.70%          France
                name     club position fpl_sel     nationality
0     ALEXIS SANCHEZ  ARSENAL       LW  17.10%           cHILE
1         MESUT OZIL  ARSENAL       AM   5.60%         gERMANY
2          PETR CECH  ARSENAL       GK   5.90%  cZECH rEPUBLIC
3       THEO WALCOTT  ARSENAL       RW   1.50%         eNGLAND
4  LAURENT KOSCIELNY  ARSENAL       CB   0.70%          fRANCE
                name     club position fpl_sel     nationality
0     alexis sanchez  aRSENAL       Lw  17.10%           cHILE
1         mesut ozil  aRSENAL       Am   5.60%         gERMANY
2          petr cech  aRSENAL       Gk   5.90%  cZECH r

# 133: More Flexibility With apply()

In [289]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [290]:
def round_floats(x):
  if x.dtype == np.float64:
    return round(x)

  return x

In [291]:
players.apply(round_floats)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,10.0,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,6.0,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,8.0,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.0,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.0,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,4.0,198,4.0,0.30%,29,1,England,0,2,20,0,0
463,Ashley Fletcher,West+Ham,21,CF,1,1.0,412,4.0,5.90%,16,1,England,0,1,20,0,1


In [292]:
players.select_dtypes(np.float64).head()

Unnamed: 0,market_value,fpl_value
0,65.0,12.0
1,50.0,9.5
2,7.0,5.5
3,20.0,7.5
4,22.0,6.0


In [293]:
players.select_dtypes(np.float64).apply(round_floats).head()

Unnamed: 0,market_value,fpl_value
0,65.0,12.0
1,50.0,10.0
2,7.0,6.0
3,20.0,8.0
4,22.0,6.0


In [294]:
# apply() = agg() + transfroms()

In [295]:
# apply as aggregate

In [296]:
players.agg('mean')

  return f(*args, **kwargs)


age              26.776344
position_cat      2.178495
market_value     11.125649
page_views      771.546237
fpl_value         5.450538
fpl_points       57.544086
region            1.989247
new_foreign       0.034409
age_cat           3.195699
club_id          10.253763
big_club          0.309677
new_signing       0.144086
dtype: float64

In [297]:
players.apply('mean')

  return f(*args, **kwargs)


age              26.776344
position_cat      2.178495
market_value     11.125649
page_views      771.546237
fpl_value         5.450538
fpl_points       57.544086
region            1.989247
new_foreign       0.034409
age_cat           3.195699
club_id          10.253763
big_club          0.309677
new_signing       0.144086
dtype: float64

In [298]:
# players.transform('mean')

In [299]:
# flipping the axis param

In [300]:
players.apply('mean', axis=0)

  return f(*args, **kwargs)


age              26.776344
position_cat      2.178495
market_value     11.125649
page_views      771.546237
fpl_value         5.450538
fpl_points       57.544086
region            1.989247
new_foreign       0.034409
age_cat           3.195699
club_id          10.253763
big_club          0.309677
new_signing       0.144086
dtype: float64

In [301]:
#players.age.mean()

In [302]:
players.apply('mean', axis=1)

  return f(*args, **kwargs)


0      392.333333
1      388.208333
2      143.708333
3      214.875000
4       91.916667
          ...    
460     31.875000
461     24.791667
462     23.750000
463     39.875000
464     24.708333
Length: 465, dtype: float64

In [303]:
# players.loc[460, :].mean()

In [304]:
players.loc[460, [dtype != object for dtype in players.dtypes]].mean()

31.875

# 134: Element-wise Operations With applymap() 

In [305]:
# vectorized ops: agg(), transform(), apply()

In [306]:
# non-vectorized: applymap()

In [307]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [308]:
# ~2%/year

In [309]:
inflation = 1.02

In [310]:
mini_df = players.loc[:, ['market_value', 'fpl_value']]

In [311]:
mini_df * inflation

Unnamed: 0,market_value,fpl_value
0,66.30,12.24
1,51.00,9.69
2,7.14,5.61
3,20.40,7.65
4,22.44,6.12
...,...,...
460,5.10,4.59
461,7.14,4.59
462,4.59,4.59
463,1.02,4.59


In [312]:
from datetime import datetime
counter = 0

def log_and_transform(x):
  global counter
  counter += 1
  if counter % 100 == 0:
    print(f"It's {datetime.now()} and I just adjusted the {counter}th value.")
    # print("It's {} and I just adjusted the {}th value.".format(datetime.now(), counter))
  
  return x * inflation

In [313]:
import sys
sys.version

'3.7.12 (default, Jan 15 2022, 18:48:18) \n[GCC 7.5.0]'

In [314]:
mini_df.apply(log_and_transform)

Unnamed: 0,market_value,fpl_value
0,66.30,12.24
1,51.00,9.69
2,7.14,5.61
3,20.40,7.65
4,22.44,6.12
...,...,...
460,5.10,4.59
461,7.14,4.59
462,4.59,4.59
463,1.02,4.59


In [315]:
mini_df.applymap(log_and_transform)

It's 2022-03-22 22:37:27.625979 and I just adjusted the 100th value.
It's 2022-03-22 22:37:27.629362 and I just adjusted the 200th value.
It's 2022-03-22 22:37:27.629882 and I just adjusted the 300th value.
It's 2022-03-22 22:37:27.630345 and I just adjusted the 400th value.
It's 2022-03-22 22:37:27.648788 and I just adjusted the 500th value.
It's 2022-03-22 22:37:27.649686 and I just adjusted the 600th value.
It's 2022-03-22 22:37:27.650150 and I just adjusted the 700th value.
It's 2022-03-22 22:37:27.650828 and I just adjusted the 800th value.
It's 2022-03-22 22:37:27.651316 and I just adjusted the 900th value.


Unnamed: 0,market_value,fpl_value
0,66.30,12.24
1,51.00,9.69
2,7.14,5.61
3,20.40,7.65
4,22.44,6.12
...,...,...
460,5.10,4.59
461,7.14,4.59
462,4.59,4.59
463,1.02,4.59


# 135: Skill Challenge

###### **1.**

Create a standalone function that
- accepts a single parameter x
- returns the string 'relatively unknown' if x is less than 220
- 'kind of popular' if x is between 220 and 600 (non-inclusive)
- 'popular' if x is between 600 and 2000 (non-inclusive)
- 'super-popular' otherwise

| lower bound | upper bound (non-inclusive) | popularity label     |
|-------------|-----------------------------|----------------------|
| -inf        | 220                         | 'relatively unknown' |
| 220         | 600                         | 'kind of popular'    |
| 600         | 2000                        | 'popular'            |
| 2000        | +inf                        | 'super-popular'      |

###### **2.**

Apply the function from the step above to the players _page_views_ column. Use a method that supports vectorized operations.

###### **3.**

Add the output from the step above as a new column to the _players_ dataframe. Name the column _popularity_.

###### **4.**

How many "super-popular" players are there?

#### Solution

In [316]:
# 1

In [317]:
def get_popularity(x):
  if x < 220:
    return 'relatively unknown'
  elif x < 600:
    return 'kind of popular'
  elif x < 2000:
    return 'popular'
  else:
    return 'super-popular'

In [318]:
get_popularity(1)

'relatively unknown'

In [319]:
get_popularity(1000)

'popular'

In [320]:
get_popularity(10000)

'super-popular'

In [321]:
# 2

In [322]:
players.page_views.head()

0    4329
1    4395
2    1529
3    2393
4     912
Name: page_views, dtype: int64

In [323]:
players.page_views.apply(get_popularity)

0           super-popular
1           super-popular
2                 popular
3           super-popular
4                 popular
              ...        
460       kind of popular
461    relatively unknown
462    relatively unknown
463       kind of popular
464    relatively unknown
Name: page_views, Length: 465, dtype: object

In [324]:
# 3

In [325]:
players['popularity'] = players.page_views.apply(get_popularity)

In [326]:
players.popularity

0           super-popular
1           super-popular
2                 popular
3           super-popular
4                 popular
              ...        
460       kind of popular
461    relatively unknown
462    relatively unknown
463       kind of popular
464    relatively unknown
Name: popularity, Length: 465, dtype: object

In [327]:
# 4

In [328]:
players.popularity.value_counts()

kind of popular       170
popular               143
relatively unknown    115
super-popular          37
Name: popularity, dtype: int64

In [329]:
players[players.popularity == 'super-popular'].name.size

37

# 137: Setting DataFrame Values

In [330]:
players.head(10)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,popular
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,popular
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0,popular
6,Olivier Giroud,Arsenal,30,CF,1,22.0,2230,8.5,2.50%,116,2,France,0,4,1,1,0,super-popular
7,Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0,kind of popular
8,Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1,popular
9,Alex Iwobi,Arsenal,21,LW,1,10.0,1812,5.5,1.00%,89,4,Nigeria,0,1,1,1,0,popular


In [331]:
%%timeit
players.loc[3, 'position'] = 'CM'

The slowest run took 8.40 times longer than the fastest. This could mean that an intermediate result is being cached.
1000 loops, best of 5: 418 µs per loop


In [332]:
355/5.73

61.95462478184991

In [333]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,popular
3,Theo Walcott,Arsenal,28,CM,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,popular


In [334]:
%%timeit
players.iloc[3, 3] = 'RW'

1000 loops, best of 5: 395 µs per loop


In [335]:
# at[] and iat[] should be preferred for single value assignment

In [None]:
%%timeit
players.at[3, 'position'] = 'CM'

In [None]:
players.head()

In [None]:
%%timeit
players.iat[3, 3] = 'RW'

# 138:The SettingWithCopy Warning

In [None]:
players.head()

In [None]:
players['page_views'][2] = 2001

In [None]:
players.head()

In [None]:
players.drop_duplicates()['page_views'][2] = 3000

In [None]:
players.head()

In [None]:
pd.options.mode.chained_assignment = 'warn' 

In [None]:
# 'None' -> turns the SettingWithCopyWarning off

# 139: View vs Copy

In [None]:
players.head()

In [None]:
players.loc[0:3, 'position'] = ['CM', 'RW', 'CB', 'GK']

In [None]:
players.head()

In [None]:
# Aaron -> nickname

In [None]:
players.loc[players.name.str.startswith('Aaron')]

In [None]:
players.loc[players.name.str.startswith('Aaron'), 'name'] = 'Ronny'

In [None]:
players.loc[[15, 157, 176, 455]]

In [None]:
# do not do this!

In [None]:
players['age'].iloc[1] = '12'

In [None]:
players.drop_duplicates().loc[3, 'position'] = 'CM'

# 140:Adding DataFrame Columns

In [None]:
players.popularity

In [None]:
'MVtoFPL' in players

In [None]:
'popularity' in players

In [None]:
players['MVtoFPL'] = 1.0

In [None]:
'MVtoFPL' in players

In [None]:
players.head()

In [None]:
players['MVtoFPL'] = players['market_value'] / players['fpl_value']

In [None]:
players.head()

In [None]:
df_mini = players.iloc[:4, 1:5]

In [None]:
df_mini

In [None]:
player_names = pd.Series(['Bronson', 'Bradley', 'Ronald', 'Ronny'])

In [None]:
player_names

In [None]:
df_mini.insert(0, 'nicknames', player_names)

In [None]:
df_mini

In [None]:
# the assign() approach

In [None]:
df_mini.assign(career_goals=[12 ,67, 179, 49], nationality=['American', 'British', 'Turkish', 'Indian'])

In [None]:
df_mini

# 141: Adding Rows To DataFrames

In [None]:
df_mini

In [None]:
# the append() method -> series, dfs, or a collection of them

In [None]:
cristiano = pd.Series({
    'nicknames': 'Cristiano',
    'age': 32,
    'position': 'RW',
    'club': 'Juventus',
    'position_cat': 1
}, name=4)

In [None]:
cristiano

In [None]:
df_mini = df_mini.append(cristiano)

In [None]:
# df_mini.append([player_1, player_2, player_3...])

In [None]:
other_players = pd.DataFrame({
    'nicknames': ['Gianluigi', 'Lionel'],
    'age': [37, 32],
    'club': ['Juventus', 'Barcelona'],
    'position': ['GK', 'CF'],
    'position_cat': [4,2]
}, index=[5,6])

In [None]:
other_players

In [None]:
df_mini = df_mini.append(other_players)

In [None]:
# setting with enlargement

In [None]:
# df['inexistent label'] = 'some value'

In [None]:
df_mini

In [None]:
df_mini.loc[9] = 'some row value'

In [None]:
df_mini

In [None]:
# adding rows to dataframes is inefficient (a very expensive operation)

142 BONUS - How Are DataFrames Stored In Memory

In [None]:
players.info(verbose=False)

In [None]:
players.head()

# 143:Skill Challenge

###### **1.**

From the _players_ dataframe select 4 columns and 4 rows, of no particular order. Assign the resulting 4x4 dataframe to _df_random_.

###### **2**.

Extend _df_random_ **1**) vertically by adding a new row, and **2**) horizontally by adding a new column. Do this as two separate operations.

###### **3.**

Compare the relative performance of the operations above. Is adding a row or column faster? Is there a significant difference?

# 144: Solution

In [None]:
# 1

In [None]:
players.head()

In [None]:
players.loc[0:3, 'name':'position']

In [None]:
df_random = players.sample(4).sample(4, axis=1)

In [None]:
df_random

In [None]:
# 2

In [None]:
df_random

In [None]:
# adding a row

In [None]:
df_random.append(pd.Series({'nationality': 'Norway', 'age': 24, 'position': 'CM', 'club_id': 20}, name=307))

In [None]:
# add a column

In [None]:
df_random.assign(years_in_league=[3, 6, 1, 10])

In [None]:
# 3

In [None]:
%%timeit
df_random.append(pd.Series({'nationality': 'Norway', 'age': 24, 'position': 'CM', 'club_id': 20}, name=307))

In [None]:
%%timeit
df_random.assign(years_in_league=[3, 6, 1, 10])

In [None]:
4400/623

In [None]:
print("\N{horse}")
print("\N{dog}")
print("\N{cat}")
print("\N{mouse}")
print("\N{bird}")
print("\N{book}")
print("\N{bread}")

# You can Try more here: