In [27]:
import numpy as np
import pandas as pd

In [28]:
pd.__version__

'2.1.4'

#### Introducing A New Dataset

In [29]:

# NEW DATA!
# - English Premier League soccer players
# - 400+ players x 17 attributes


In [30]:
data_url = 'https://andybek.com/pandas-soccer' # as always, this will be attached too!

In [31]:
players = pd.read_csv(data_url)

In [32]:
players.info(verbose=False, memory_usage='deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 465 entries, 0 to 464
Columns: 17 entries, name to new_signing
dtypes: float64(2), int64(10), object(5)
memory usage: 190.7 KB


In [33]:
players.dtypes.value_counts()

int64      10
object      5
float64     2
Name: count, dtype: int64

In [34]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


#### Quick Review: Indexing With Boolean Masks

In [35]:
# boolean indexing:
#   step 1: generate sequence of booleans
#   step 2: use boolean sequence in [] or .loc[]

In [36]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [37]:
# Q: what are the players that have a market value exceeding 40M?

In [38]:
players.market_value > 40

0       True
1       True
2      False
3      False
4      False
       ...  
460    False
461    False
462    False
463    False
464    False
Name: market_value, Length: 465, dtype: bool

In [39]:
players[players.market_value > 40]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
96,Eden Hazard,Chelsea,26,LW,1,75.0,4220,10.5,2.30%,224,2,Belgium,0,3,5,1,0
97,Diego Costa,Chelsea,28,CF,1,50.0,4454,10.0,3.00%,196,2,Spain,0,4,5,1,0
108,N%27Golo Kante,Chelsea,26,DM,2,50.0,4042,5.0,13.80%,83,2,France,0,3,5,1,1
218,Philippe Coutinho,Liverpool,25,AM,1,45.0,2958,9.0,30.80%,171,3,Brazil,0,3,10,1,0
244,Kevin De Bruyne,Manchester+City,26,AM,1,65.0,2252,10.0,17.50%,199,2,Belgium,0,3,11,1,0
245,Sergio Aguero,Manchester+City,29,CF,1,65.0,4046,11.5,9.70%,175,3,Argentina,0,4,11,1,0
246,Raheem Sterling,Manchester+City,22,LW,1,45.0,2074,8.0,3.80%,149,1,England,0,2,11,1,0
264,Romelu Lukaku,Manchester+United,24,CF,1,50.0,3727,11.5,45.00%,221,2,Belgium,0,2,12,1,0


In [40]:
players[players.market_value > 40].shape

(13, 17)

#### More Approaches To Boolean Masking

In [41]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [42]:
players.position.unique()

array(['LW', 'AM', 'GK', 'RW', 'CB', 'RB', 'CF', 'LB', 'DM', 'RM', 'CM',
       nan, 'SS', 'LM'], dtype=object)

In [43]:
players.position.unique().size

14

In [44]:
# defender codes: LB, CB, RB

In [45]:
players.position.isin(['LB', 'CB', 'RB'])

0      False
1      False
2      False
3      False
4       True
       ...  
460    False
461     True
462     True
463    False
464    False
Name: position, Length: 465, dtype: bool

In [46]:
players.loc[players.position.isin(['LB', 'CB', 'RB'])]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
5,Hector Bellerin,Arsenal,22,RB,3,30.0,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
7,Nacho Monreal,Arsenal,31,LB,3,13.0,555,5.5,4.70%,115,2,Spain,0,4,1,1,0
8,Shkodran Mustafi,Arsenal,25,CB,3,30.0,1877,5.5,4.00%,90,2,Germany,0,3,1,1,1
17,Gabriel Paulista,Arsenal,26,CB,3,13.0,552,5.0,0.10%,45,3,Brazil,0,3,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
455,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0
458,Angelo Ogbonna,West+Ham,29,CB,3,9.0,247,4.5,1.10%,45,2,Italy,0,4,20,0,0
459,Pablo Zabaleta,West+Ham,32,RB,3,7.0,698,5.0,2.70%,45,3,Argentina,0,5,20,0,0
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1


In [47]:
players.market_value.between(40,50, inclusive=False)

ValueError: Inclusive has to be either string of 'both','left', 'right', or 'neither'.

In [None]:
players[players.market_value.between(40,50, inclusive=True)]

In [None]:
players.age <= 25

In [None]:
players.loc[players.age <= 25]

In [None]:
players.loc[players.age.le(25)]

In [None]:
players.age.le(25).equals(players.age <= 25)

#### Binary Operators With Booleans

In [None]:
# Binary OR -> |

In [None]:
True | False

In [None]:
False | False

In [None]:
False | True

In [None]:
True | True

In [None]:
# Binary AND -> &

In [None]:
True & False

In [None]:
False & True

In [None]:
False & False

In [None]:
True & True

In [None]:
# a single false is enough!

In [None]:
True & True & False & True & True

In [None]:
f = pd.Series(False)

In [None]:
f

In [None]:
t = pd.Series(True)

In [None]:
t


In [None]:
t & f

In [None]:
t | f

In [None]:
t = pd.Series([True if i%2 ==0 else False for i in range(10)])

In [None]:
t

In [None]:
f = pd.Series([False for i in range(10)])

In [None]:
f

In [None]:
t & f

In [None]:
t | f

In [None]:
f = pd.Series(data=[False, True, True], index=['c','b', 'a'])
t = pd.Series(data=[True, False, False], index=['a', 'b', 'c'])

In [None]:
f

In [None]:
t

In [None]:
f & t

#### BONUS - XOR And Complement Binary Ops

In [None]:
# Binary XOR -> ^

In [None]:
True ^ False

In [None]:
False ^ False

In [None]:
True ^ True

In [None]:
True ^ (False | False & True) | False

In [None]:
# (Two's) Complement -> ~

In [None]:
# tilde (~) negates

In [None]:
~False

In [None]:
~0

In [None]:
~True

In [None]:
~1

In [None]:
t = pd.Series([True, True, False])

In [None]:
t

In [None]:
~t

#### Combining Conditions

In [None]:
# select all the left backs -> LB

In [None]:
players.head()

In [None]:
# all left backs who are 25 or younger

In [None]:
players[
        (players.position == 'LB') & 
        (players.age <= 25)
        ]

In [None]:
# 3 conditions: left backs, 25 or younger, market value 10+

In [None]:
players[
        (players.position == 'LB') & 
        (players.age <= 25) & 
        (players.market_value >= 10)
        ]

In [None]:
# 4 conditions: left backs, 25 or younger, market value 10+, and NOT from Arsenal or Tottenham clubs

In [None]:
players[
        (players.position == 'LB') &
        (players.age <= 25) &
        (players.market_value >= 10) &
        ~(players.club.isin(['Tottenham', 'Arsenal']))
        ]

#### Conditions As Variables

In [None]:
players[
        (players.position == 'LB') &
        (players.age <= 25) &
        (players.market_value >= 10) &
        ~(players.club.isin(['Tottenham', 'Arsenal']))
        ]

In [None]:
# new target: Arsenal right backs and Chelsea goalkeepers

In [None]:
arsenal_player = players.club == 'Arsenal'

In [None]:
arsenal_player

In [None]:
right_back = players.position == 'RB'

In [None]:
right_back

In [None]:
chelsea_and_GK = (players.club == 'Chelsea') & (players.position == 'GK')

In [None]:
chelsea_and_GK

In [None]:
players.loc[arsenal_player & right_back | chelsea_and_GK]

#### Skill Challenge

######Find the players in our dataset that meet these criteria

1.   they're English (*nationality*), and
2.   their market value is more than twice the average market value in the league (*market_value*), and
3. they either have more than 4,000 views (*page_views*) or are a new signing (*new_signing*) but not both

#### Solution

In [None]:
players.head()

In [None]:
# first condition

In [None]:
english = players.nationality == 'England'

In [None]:
english.head()

In [None]:
# second condtion

In [None]:
players.market_value.mean()

In [None]:
above_average = players.market_value > players.market_value.mean() * 2

In [None]:
above_average.head()

In [None]:
# third condition

In [None]:
popular_xor_new = (players.page_views > 4000) ^ (players.new_signing == 1)

In [None]:
popular_xor_new.head()

In [None]:
print(english.shape)

In [None]:
print(above_average.shape)
print(popular_xor_new.shape)
print(players.shape)

In [None]:
players.loc[english & above_average & popular_xor_new]

#### 2d Indexing

In [None]:
players.head()

In [None]:
# chelsea players and 23 years old or younger

In [None]:
chelsea_23under = (players.club == 'Chelsea') & (players.age.le(23))

In [None]:
chelsea_23under.head()

In [None]:
players.loc[chelsea_23under, ['position', 'market_value']]

In [None]:
# ...select all column that begin with 'p'

In [None]:
# startwith

In [None]:
p_cols = players.columns.str.startswith('p')

In [None]:
players.loc[chelsea_23under, p_cols]

In [None]:
print(chelsea_23under.shape)

In [None]:
print(players.shape)

In [None]:
print(p_cols.shape)

In [None]:
# [] chaining

In [None]:
players[chelsea_23under]['position']

In [None]:
players.loc[chelsea_23under, 'position']

#### Fancy Indexing With lookup()

In [None]:
players.head()

In [None]:
players.loc[[0, 132], ('name', 'market_value')] # fancy

In [None]:
players.lookup([0, 132], ['name', 'market_value'])

In [None]:
players.lookup([0, 132], ['market_value', 'name'])

In [None]:
names = ['Petr Cech', 'Mesut Ozil', 'Alexis Sanchez']

In [None]:
attributes = ['age', 'market_value', 'page_views']

In [None]:
players.set_index('name').lookup(names, attributes)

#### Sorting By Index Or Column

In [None]:
players.head()

In [None]:
players.sort_values(by='market_value', ascending=False)

In [None]:
players.index

In [None]:
players.set_index('name', inplace=True)

In [None]:
players.head(10)

In [None]:
players.index

In [None]:
players.sort_index(inplace=True)

In [None]:
players.head(10)

In [None]:
players.sort_index(axis=1)

In [None]:
players.reset_index()

In [None]:
players.reset_index().reset_index().reset_index()

In [None]:
players.reset_index(inplace=True)

In [None]:
players.head()

#### Sorting vs. Reordering

In [None]:
# reindex()

In [None]:
players_lite = players.iloc[:4, :4]

In [None]:
players_lite

In [None]:
# row order: 2, 1, 3, 0
# column order: age, name, position, club

In [None]:
players_lite.reindex(index=[2,1,3,0], columns=['age', 'name', 'position', 'club'])

In [None]:
players.reindex(index=[2,1,3,0])

In [None]:
# ...get all the columns, and have them alphabetically ordered

In [None]:
players.reindex(index=[2,1,3,0]).sort_index(axis=1)

In [None]:
players.reindex(index=[2,1,3,0], columns=[])

In [None]:
# how do we get a sorted list of column labels?

In [None]:
# ===ASIDE===

In [None]:
players.columns

In [None]:
iter(players.columns)

In [None]:
# iter(True)

In [None]:
sorted(players.columns)

In [None]:
# ===END ASIDE===

In [None]:
players.reindex(index=[2,1,3,0], columns=sorted(players.columns)[:6])

#### BONUS - Another Way

In [None]:
players.reindex(index=[2,1,3,0], columns=sorted(players.columns))

In [None]:
sorted(players.columns)

In [None]:
players.columns.sort_values()

In [None]:
players.reindex(index=[2,1,3,0], columns=players.columns.sort_values())

#### BONUS: Please Avoid Sorting Like This

In [None]:
# anti-pattern - please do not do this!!!

In [None]:
df = players.iloc[:6, :6]

In [None]:
df

In [None]:
df.swapaxes(1, 0)

In [None]:
df.T.sort_index().T

In [None]:
df.sort_index(axis=1)

#### Skill Challenge

###### **1.**

Sort the players in the **players** dataframe by age in ascending order. Who is the youngest footballer in the EPL?

###### **2**. 

Set the *club* column as the index of the dataframe. Then sort the dataframe index in alphabetical order. Make sure these changes are applied to the underlying dataframe and carry over to the next question. 

###### **3**.

Sort the dataframe values by *club* and *market_value* where the club is alphabetical (Arsenal first) and the market value is in descending order (within each team, the most valuable players first).

#### Solution

In [None]:
# 1

In [None]:
players.sort_values(by='age', ascending=True)

In [None]:
# idxmin()!

In [None]:
players.iloc[players.age.idxmin()]

In [None]:
# 2

In [None]:
players.set_index('club').sort_index(inplace=True)

In [None]:
players.head()

In [None]:
# 3

In [None]:
players.sort_values(by=['club', 'market_value'], ascending=[True, False])

In [None]:
players

In [None]:
players.duplicated()

In [None]:
# DUPLICATED 

In [None]:
players[players.duplicated()]

In [None]:
players.duplicated(subset=['club','age','position','market_value'])

In [None]:
players.loc[players.duplicated(subset=['club','age','position','market_value'])].shape

In [None]:
players[players.duplicated()]

In [None]:
players.market_value.mean()

In [None]:
players.drop_duplicates(keep='first').market_value.mean()

In [None]:
#REMOVING ROWS

In [None]:
players.drop(labels= 13, axis=0)

In [None]:
players.drop(index=13)

In [None]:
players.drop(index= [10,29,30, 431])

In [None]:
players.shape

In [None]:
# drop Column 

In [None]:
players.drop(labels=['age','market_value'],axis=1)

In [None]:
players.drop(columns=['age','position','market_value'])

In [None]:
players.pop('club')

In [None]:
players.shape

In [None]:
players.head()

In [None]:
#reeindex to remove row e columns

In [None]:
players.reindex()

In [None]:
players = pd.read_csv('https://andybek.com/pandas-soccer')

In [None]:
players.head()

In [None]:
players.age

In [None]:
ages  = players.age

In [None]:
ages.isna().sum()

In [None]:
# dataframe

In [None]:
players.isna().sum()

In [None]:
np.count_nonzero(players.isna())

In [48]:
players[players.isna().values].drop_duplicates()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,,2,,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [50]:
players.ffill().loc[[30, 192, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,LB,2,15.0,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,3.0,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,30.0,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [51]:
players.ffill(axis=1).loc[[30, 192, 195]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
30,Granit Xhaka,Arsenal,24,24,2,2,1815,5.5,2.00%,85,2,Switzerland,0,2,1,1,0
192,Steve Mounie,Huddersfield,22,CF,1,1,56,6.0,0.60%,0,2,Benin,0,2,8,0,0
195,Kasper Schmeichel,Leicester+City,30,GK,4,4,1601,5.0,2.40%,109,2,Denmark,0,4,9,0,0


In [None]:
players.fillna('some meaningful replacement value').loc[[30,192,195]]

In [None]:
players.fillna({'market_value':100,'position':'RM'}).loc[[30,192,195]]

In [None]:
players.fillna({'market_value':players.market_value.mean(),'position':'RM'}).loc[[30,192,195]]

In [None]:
# DROPPIG NULL VALUES 

In [None]:
players.dropna( axis=1, how= 'all').loc[[30,192,195]]

In [52]:
players.shape

(465, 17)

In [107]:
df2 = players.drop(labels=[2,10,21]).drop(columns ='market_value')

In [103]:
players.drop(index =[2,10,21],columns='market_value')

Unnamed: 0,name,club,age,position,position_cat,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,912,6.0,0.70%,121,2,France,0,4,1,1,0
5,Hector Bellerin,Arsenal,22,RB,3,1675,6.0,13.70%,119,2,Spain,0,2,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Edimilson Fernandes,West+Ham,21,CM,2,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,198,4.5,0.30%,29,1,England,0,2,20,0,0
463,Ashley Fletcher,West+Ham,21,CF,1,412,4.5,5.90%,16,1,England,0,1,20,0,1


In [55]:
players.nationality.isna().sum()

0

In [102]:
players['nationality'].drop_duplicates().size

61

In [109]:
# Alternativa 

In [112]:
players.nationality.nunique()

61

In [115]:
players.drop_duplicates(subset=['age','club','position'], keep='first').loc[:,['age','position']]

Unnamed: 0,age,position
0,28,LW
1,28,AM
2,35,GK
3,28,RW
4,31,CB
...,...,...
460,21,CM
461,23,LB
462,23,RB
463,21,CF


In [119]:
players

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.5,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.5,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,4.5,198,4.5,0.30%,29,1,England,0,2,20,0,0
463,Ashley Fletcher,West+Ham,21,CF,1,1.0,412,4.5,5.90%,16,1,England,0,1,20,0,1


In [120]:
#NUMERIC COLUMNS

In [121]:
numeric_columns = players.select_dtypes(include=[np.number])

In [122]:
numeric_columns.agg('mean')

age              26.776344
position_cat      2.178495
market_value     11.125649
page_views      771.546237
fpl_value         5.450538
fpl_points       57.544086
region            1.989247
new_foreign       0.034409
age_cat           3.195699
club_id          10.253763
big_club          0.309677
new_signing       0.144086
dtype: float64

In [123]:
players.new_signing.mean()

0.14408602150537633

In [130]:
numeric_columns.agg('min')

age             17.00
position_cat     1.00
market_value     0.05
page_views       3.00
fpl_value        4.00
fpl_points       0.00
region           1.00
new_foreign      0.00
age_cat          1.00
club_id          1.00
big_club         0.00
new_signing      0.00
dtype: float64

In [132]:
players.select_dtypes(np.number).agg(['min','max','mean'])

Unnamed: 0,age,position_cat,market_value,page_views,fpl_value,fpl_points,region,new_foreign,age_cat,club_id,big_club,new_signing
min,17.0,1.0,0.05,3.0,4.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0
max,38.0,4.0,75.0,7664.0,12.5,264.0,4.0,1.0,6.0,20.0,1.0,1.0
mean,26.776344,2.178495,11.125649,771.546237,5.450538,57.544086,1.989247,0.034409,3.195699,10.253763,0.309677,0.144086


In [133]:
players.head(3)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0


In [134]:
# FX RATE 

In [136]:
#ustoeur = 0.93

In [140]:
players.loc[:,['market_value','fpl_value']]

Unnamed: 0,market_value,fpl_value
0,65.0,12.0
1,50.0,9.5
2,7.0,5.5
3,20.0,7.5
4,22.0,6.0
...,...,...
460,5.0,4.5
461,7.0,4.5
462,4.5,4.5
463,1.0,4.5


In [141]:
players.loc[:,['market_value','fpl_value']].transform(lambda x: x* 0.93)

Unnamed: 0,market_value,fpl_value
0,60.450,11.160
1,46.500,8.835
2,6.510,5.115
3,18.600,6.975
4,20.460,5.580
...,...,...
460,4.650,4.185
461,6.510,4.185
462,4.185,4.185
463,0.930,4.185


In [142]:
players.loc[:,['market_value','fpl_value']]* 0.93

Unnamed: 0,market_value,fpl_value
0,60.450,11.160
1,46.500,8.835
2,6.510,5.115
3,18.600,6.975
4,20.460,5.580
...,...,...
460,4.650,4.185
461,6.510,4.185
462,4.185,4.185
463,0.930,4.185


In [143]:
from random import choice 

In [144]:
def random_case(x):
    funcs = [x.str.swapcase,x.str.lower, x.str.title, x.str.upper]
    return choice(funcs)()

In [151]:
players[players.select_dtypes(include='object').columns] = players.select_dtypes(include='object').map(random_case)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,,,28,,1,65.0,4329,12.0,,264,3,,0,4,1,1,0
1,,,28,,1,50.0,4395,9.5,,167,2,,0,4,1,1,0
2,,,35,,4,7.0,1529,5.5,,134,2,,0,6,1,1,0
3,,,28,,1,20.0,2393,7.5,,122,1,,0,4,1,1,0
4,,,31,,3,22.0,912,6.0,,121,2,,0,4,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,,,21,,2,5.0,288,4.5,,38,2,,0,1,20,0,1
461,,,23,,3,7.0,199,4.5,,34,4,,0,2,20,0,1
462,,,23,,3,4.5,198,4.5,,29,1,,0,2,20,0,0
463,,,21,,1,1.0,412,4.5,,16,1,,0,1,20,0,1


In [153]:
players = pd.read_csv(data_url)

In [155]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [160]:
def round_floats(x):
    if np.issubdtype(x.dtype, np.floating):  # Verifica se o tipo é float
        return round(x)
    return x

In [161]:
players.apply(round_floats)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,10.0,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,6.0,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,8.0,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,Edimilson Fernandes,West+Ham,21,CM,2,5.0,288,4.0,0.40%,38,2,Switzerland,0,1,20,0,1
461,Arthur Masuaku,West+Ham,23,LB,3,7.0,199,4.0,0.20%,34,4,Congo DR,0,2,20,0,1
462,Sam Byram,West+Ham,23,RB,3,4.0,198,4.0,0.30%,29,1,England,0,2,20,0,0
463,Ashley Fletcher,West+Ham,21,CF,1,1.0,412,4.0,5.90%,16,1,England,0,1,20,0,1


In [163]:
players.select_dtypes(np.float64).head()

Unnamed: 0,market_value,fpl_value
0,65.0,12.0
1,50.0,9.5
2,7.0,5.5
3,20.0,7.5
4,22.0,6.0


In [164]:
players.select_dtypes(np.float64).apply(round_floats).head()

Unnamed: 0,market_value,fpl_value
0,65.0,12.0
1,50.0,10.0
2,7.0,6.0
3,20.0,8.0
4,22.0,6.0


In [171]:
players.select_dtypes(include=np.number).apply('mean', axis=1)

0      392.333333
1      388.208333
2      143.708333
3      214.875000
4       91.916667
          ...    
460     31.875000
461     24.791667
462     23.750000
463     39.875000
464     24.708333
Length: 465, dtype: float64

In [173]:
players.loc[460,[dtype != object for dtype in players.dtypes]].mean()

31.875

In [174]:
# Vectorized ops: agg() , apply(), transform ()

In [175]:
# Non-vectorized : applymap()

In [176]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0


In [177]:
inflation = 1.02

In [180]:
players.loc[:,['market_value','fpl_value']]

Unnamed: 0,market_value,fpl_value
0,65.0,12.0
1,50.0,9.5
2,7.0,5.5
3,20.0,7.5
4,22.0,6.0
...,...,...
460,5.0,4.5
461,7.0,4.5
462,4.5,4.5
463,1.0,4.5


In [179]:
 mini_df = players.loc[:,['market_value','fpl_value']]

In [181]:
mini_df * inflation

Unnamed: 0,market_value,fpl_value
0,66.30,12.24
1,51.00,9.69
2,7.14,5.61
3,20.40,7.65
4,22.44,6.12
...,...,...
460,5.10,4.59
461,7.14,4.59
462,4.59,4.59
463,1.02,4.59


In [184]:
from datetime import datetime
counter = 0
def log_and_transform(x):
    global counter
    counter +=1
    if counter % 100 == 0:
        print(f"It's {datetime.now()} and I just adjusted the {counter}th value")
    return x * inflation
        

In [186]:
import sys
sys.version

'3.11.7 | packaged by Anaconda, Inc. | (main, Dec 15 2023, 18:05:47) [MSC v.1916 64 bit (AMD64)]'

In [188]:
mini_df.applymap(log_and_transform)

It's 2024-06-25 14:10:14.177189 and I just adjusted the 100th value
It's 2024-06-25 14:10:14.177189 and I just adjusted the 200th value
It's 2024-06-25 14:10:14.177189 and I just adjusted the 300th value
It's 2024-06-25 14:10:14.177189 and I just adjusted the 400th value
It's 2024-06-25 14:10:14.177189 and I just adjusted the 500th value
It's 2024-06-25 14:10:14.177189 and I just adjusted the 600th value
It's 2024-06-25 14:10:14.177189 and I just adjusted the 700th value
It's 2024-06-25 14:10:14.178186 and I just adjusted the 800th value
It's 2024-06-25 14:10:14.178186 and I just adjusted the 900th value


  mini_df.applymap(log_and_transform)


Unnamed: 0,market_value,fpl_value
0,66.30,12.24
1,51.00,9.69
2,7.14,5.61
3,20.40,7.65
4,22.44,6.12
...,...,...
460,5.10,4.59
461,7.14,4.59
462,4.59,4.59
463,1.02,4.59


In [199]:
def popularity(x):
    if x < 220:
        return 'relative unknown'
    elif x < 600:
        return 'kind of popular'
    elif x < 2000:
        return 'Popular'
    else:
        return 'super-popular'


In [202]:
players.loc[:,'page_views'].apply(popularity)

0         super-popular
1         super-popular
2               Popular
3         super-popular
4               Popular
             ...       
460     kind of popular
461    relative unknown
462    relative unknown
463     kind of popular
464    relative unknown
Name: page_views, Length: 465, dtype: object

In [192]:
mini_page_views = players.loc[:,'page_views']

In [203]:
mini_page_views.apply(popularity)

0         super-popular
1         super-popular
2               Popular
3         super-popular
4               Popular
             ...       
460     kind of popular
461    relative unknown
462    relative unknown
463     kind of popular
464    relative unknown
Name: page_views, Length: 465, dtype: object

In [204]:
players['popularity'] = mini_page_views.apply(popularity)

In [205]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,Popular
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,Popular


In [208]:
players.popularity.value_counts()

popularity
kind of popular     170
Popular             143
relative unknown    115
super-popular        37
Name: count, dtype: int64

In [214]:
players[players.popularity == 'super-popular'].name.size

37

In [215]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,Popular
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,Popular


In [224]:
players.loc[3,'position'] = 'CM'

In [225]:
players.loc[3,'position']

'CM'

In [230]:
players.iloc[3,3] = 'RW'

In [231]:
players.iloc[3,3] 

'RW'

In [233]:
# at and iat SHULD BE PREFER TO SINGLE VALUE ASSINGMENT

In [236]:
players.at[3,'position'] = 'CM'

In [238]:
players.iat[3,3] = 'RW'

In [239]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular
2,Petr Cech,Arsenal,35,GK,4,7.0,1529,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,Popular
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,Popular


In [243]:
players['page_views'][2] = 2001

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players['page_views'][2] = 2001


In [244]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular
2,Petr Cech,Arsenal,35,GK,4,7.0,2001,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,Popular
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,Popular


In [246]:
players.drop_duplicates()['page_views'][2] = 3000

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  players.drop_duplicates()['page_views'][2] = 3000


In [247]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
0,Alexis Sanchez,Arsenal,28,LW,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular
1,Mesut Ozil,Arsenal,28,AM,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular
2,Petr Cech,Arsenal,35,GK,4,7.0,2001,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,Popular
3,Theo Walcott,Arsenal,28,RW,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,Popular


In [248]:
# pd.options.mode.chained_assignment = 'warn'

# 'None' turns the setting SettingWithCopyWarning OFF 

In [251]:
players.loc[0:3,'position'] = ['CM','RW','CB','GK']

In [253]:
players.head(4)

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
0,Alexis Sanchez,Arsenal,28,CM,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular
1,Mesut Ozil,Arsenal,28,RW,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular
2,Petr Cech,Arsenal,35,CB,4,7.0,2001,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,Popular
3,Theo Walcott,Arsenal,28,GK,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular


In [254]:
# Aron nickname

In [257]:
players.loc[players.name.str.startswith('Aaron')]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
15,Aaron Ramsey,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0,Popular
157,Aaron Lennon,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0,kind of popular
176,Aaron Mooy,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0,kind of popular
455,Aaron Cresswell,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0,kind of popular


In [259]:
players.loc[players.name.str.startswith('Aaron'),'name'] = 'Ronny'

In [260]:
players.loc[[15,157,176,455]]

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity
15,Ronny,Arsenal,26,CM,2,35.0,1040,7.0,5.10%,56,1,Wales,0,3,1,1,0,Popular
157,Ronny,Everton,30,RW,1,5.0,504,5.5,0.20%,22,1,England,0,4,7,0,0,kind of popular
176,Ronny,Huddersfield,26,CM,2,5.0,588,5.5,2.50%,0,4,Australia,0,3,8,0,0,kind of popular
455,Ronny,West+Ham,27,LB,3,12.0,380,5.0,1.30%,60,1,England,0,3,20,0,0,kind of popular


In [261]:
# ADDING DATAFRAME COLUMNS 

In [262]:
players.popularity

0         super-popular
1         super-popular
2               Popular
3         super-popular
4               Popular
             ...       
460     kind of popular
461    relative unknown
462    relative unknown
463     kind of popular
464    relative unknown
Name: popularity, Length: 465, dtype: object

In [267]:
'MVP_toFPL' in players

False

In [266]:
'market_value' in players

True

In [268]:
players['MVP_toFPL'] = 1.0

In [269]:
'MVP_toFPL' in players

True

In [270]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity,MVP_toFPL
0,Alexis Sanchez,Arsenal,28,CM,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular,1.0
1,Mesut Ozil,Arsenal,28,RW,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular,1.0
2,Petr Cech,Arsenal,35,CB,4,7.0,2001,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,Popular,1.0
3,Theo Walcott,Arsenal,28,GK,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular,1.0
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,Popular,1.0


In [271]:
players['MVP_toFPL'] = players['market_value'] / players['fpl_value']

In [272]:
players.head()

Unnamed: 0,name,club,age,position,position_cat,market_value,page_views,fpl_value,fpl_sel,fpl_points,region,nationality,new_foreign,age_cat,club_id,big_club,new_signing,popularity,MVP_toFPL
0,Alexis Sanchez,Arsenal,28,CM,1,65.0,4329,12.0,17.10%,264,3,Chile,0,4,1,1,0,super-popular,5.416667
1,Mesut Ozil,Arsenal,28,RW,1,50.0,4395,9.5,5.60%,167,2,Germany,0,4,1,1,0,super-popular,5.263158
2,Petr Cech,Arsenal,35,CB,4,7.0,2001,5.5,5.90%,134,2,Czech Republic,0,6,1,1,0,Popular,1.272727
3,Theo Walcott,Arsenal,28,GK,1,20.0,2393,7.5,1.50%,122,1,England,0,4,1,1,0,super-popular,2.666667
4,Laurent Koscielny,Arsenal,31,CB,3,22.0,912,6.0,0.70%,121,2,France,0,4,1,1,0,Popular,3.666667


In [314]:
df_mini = players.iloc[:4,1:5]

In [315]:
df_mini

Unnamed: 0,club,age,position,position_cat
0,Arsenal,28,CM,1
1,Arsenal,28,RW,1
2,Arsenal,35,CB,4
3,Arsenal,28,GK,1


In [316]:
player_name = pd.Series(['Caio','Nathan','Liz', 'Rhuan'])

0      Caio
1    Nathan
2       Liz
3     Rhuan
dtype: object

In [317]:
df_mini.insert(0,'nickname',player_name)

In [318]:
df_mini

Unnamed: 0,nickname,club,age,position,position_cat
0,Caio,Arsenal,28,CM,1
1,Nathan,Arsenal,28,RW,1
2,Liz,Arsenal,35,CB,4
3,Rhuan,Arsenal,28,GK,1


In [319]:
df_mini.assign(carrer_goal = [12,10,5,4], nationality =['Brazilian','American','British','Turkish'])

Unnamed: 0,nickname,club,age,position,position_cat,carrer_goal,nationality
0,Caio,Arsenal,28,CM,1,12,Brazilian
1,Nathan,Arsenal,28,RW,1,10,American
2,Liz,Arsenal,35,CB,4,5,British
3,Rhuan,Arsenal,28,GK,1,4,Turkish


In [320]:
df_mini

Unnamed: 0,nickname,club,age,position,position_cat
0,Caio,Arsenal,28,CM,1
1,Nathan,Arsenal,28,RW,1
2,Liz,Arsenal,35,CB,4
3,Rhuan,Arsenal,28,GK,1


In [285]:
# ADDIN ROW , THE APPEND METHOD WORKS WITH , SERIES DATAFRAMES OR COLLECTION OF THEM

In [288]:
cristiano = pd.Series({
    'nickname':'Cristiano',
    'age':32,
    'position':'RW',
    'club':'Juventus',
    'position_cat':1
},name=4)

In [289]:
cristiano

nickname        Cristiano
age                    32
position               RW
club             Juventus
position_cat            1
Name: 4, dtype: object

In [325]:
df_mini

Unnamed: 0,nickname,club,age,position,position_cat
0,Caio,Arsenal,28,CM,1
1,Nathan,Arsenal,28,RW,1
2,Liz,Arsenal,35,CB,4
3,Rhuan,Arsenal,28,GK,1
4,Cristiano,Juventus,32,RW,1


In [323]:
df_mini= pd.concat([df_mini, pd.DataFrame([cristiano])], ignore_index=True)

In [326]:
df_mini1 = df_mini

In [340]:
others_players = pd.DataFrame({'nickname':['Gianluigi','Leonel'],
                                'age':[19,37],
                                'club':['Juventus','Barcelona'],
                                'position':['GK','CF'],
                                'position_cat':[4,2]
                               },index=[5,6])

In [341]:
others_players

Unnamed: 0,nickname,age,club,position,position_cat
5,Gianluigi,19,Juventus,GK,4
6,Leonel,37,Barcelona,CF,2


In [336]:
df_mini = df_mini1

In [337]:
df_mini

Unnamed: 0,nickname,club,age,position,position_cat
0,Caio,Arsenal,28,CM,1
1,Nathan,Arsenal,28,RW,1
2,Liz,Arsenal,35,CB,4
3,Rhuan,Arsenal,28,GK,1
4,Cristiano,Juventus,32,RW,1


In [347]:
df_mini = pd.concat([df_mini, others_players])

In [348]:
df_mini

Unnamed: 0,nickname,club,age,position,position_cat
0,Caio,Arsenal,28,CM,1
1,Nathan,Arsenal,28,RW,1
2,Liz,Arsenal,35,CB,4
3,Rhuan,Arsenal,28,GK,1
4,Cristiano,Juventus,32,RW,1
5,Gianluigi,Juventus,19,GK,4
6,Leonel,Barcelona,37,CF,2
5,Gianluigi,Juventus,19,GK,4
6,Leonel,Barcelona,37,CF,2


In [349]:
df_mini.drop(index=[5,6])

Unnamed: 0,nickname,club,age,position,position_cat
0,Caio,Arsenal,28,CM,1
1,Nathan,Arsenal,28,RW,1
2,Liz,Arsenal,35,CB,4
3,Rhuan,Arsenal,28,GK,1
4,Cristiano,Juventus,32,RW,1


In [350]:
df_mini = pd.concat([df_mini, others_players])

In [354]:
df_mini.drop(index=[5,6], inplace=True)

In [355]:
df_mini

Unnamed: 0,nickname,club,age,position,position_cat
0,Caio,Arsenal,28,CM,1
1,Nathan,Arsenal,28,RW,1
2,Liz,Arsenal,35,CB,4
3,Rhuan,Arsenal,28,GK,1
4,Cristiano,Juventus,32,RW,1


In [357]:
df_mini = pd.concat([df_mini, others_players])

In [358]:
df_mini

Unnamed: 0,nickname,club,age,position,position_cat
0,Caio,Arsenal,28,CM,1
1,Nathan,Arsenal,28,RW,1
2,Liz,Arsenal,35,CB,4
3,Rhuan,Arsenal,28,GK,1
4,Cristiano,Juventus,32,RW,1
5,Gianluigi,Juventus,19,GK,4
6,Leonel,Barcelona,37,CF,2


In [373]:
columns = np.random.choice(players.columns, 4, replace=False)

In [374]:
columns

array(['position', 'name', 'age', 'nationality'], dtype=object)

In [371]:
rows = np.random.choice(players.index, 4, replace=False)

In [372]:
rows

array([164, 136, 421, 441], dtype=int64)

In [376]:
df_random = players.loc[rows, columns]

In [377]:
df_random

Unnamed: 0,position,name,age,nationality
164,CF,Sandro Ramirez,22,Spain
136,GK,Julian Speroni,38,Argentina
421,RB,Kiko Femenia,26,Spain
441,GK,Boaz Myhill,34,Wales


In [378]:
#ANOTHER WAY USING SAMPLE 

In [382]:
df_random2 = players.sample(4).sample(4, axis=1)

In [385]:
df_random3 = df_random2

In [384]:
# Adding a row

In [386]:
df_random2

Unnamed: 0,age_cat,popularity,age,position_cat
181,3,relative unknown,25,2
361,5,kind of popular,32,1
362,4,relative unknown,28,3
453,4,Popular,28,1


In [389]:
new_series = pd.Series({'age_cat': 5,
                       'popularity':'popular',
                        'age': 30,
                        'position_cat':3
                       },name = 370)

In [394]:
new_series

age_cat               5
popularity      popular
age                  30
position_cat          3
Name: 370, dtype: object

In [392]:
df_random2 = df_random3

In [397]:
df_random2 = pd.concat([df_random2, pd.DataFrame([new_series])])

In [399]:
df_random2

Unnamed: 0,age_cat,popularity,age,position_cat
181,3,relative unknown,25,2
361,5,kind of popular,32,1
362,4,relative unknown,28,3
453,4,Popular,28,1
370,5,popular,30,3


In [402]:
df_random2.assign(years_in_league =[3,5,1,10,6])

Unnamed: 0,age_cat,popularity,age,position_cat,years_in_league
181,3,relative unknown,25,2,3
361,5,kind of popular,32,1,5
362,4,relative unknown,28,3,1
453,4,Popular,28,1,10
370,5,popular,30,3,6
