# Pandas data alterations

In [63]:
import pandas as pd

In [64]:
df = pd.DataFrame({
    'Name': ['Javi', 'Yulia', 'Ariel'],
    'Surname': ['Nadiz', 'Del Monte', 'Losas'],
    'Age': [12, 22, 11]
})

## Column names changes

In [65]:
# Changing all column names
df.columns = ['first name', 'last name', 'Age']
df

Unnamed: 0,first name,last name,Age
0,Javi,Nadiz,12
1,Yulia,Del Monte,22
2,Ariel,Losas,11


In [66]:
df.columns = [i.upper() for i in df.columns]
df

Unnamed: 0,FIRST NAME,LAST NAME,AGE
0,Javi,Nadiz,12
1,Yulia,Del Monte,22
2,Ariel,Losas,11


In [67]:
df.columns = df.columns.str.replace(' ', '_')
df

Unnamed: 0,FIRST_NAME,LAST_NAME,AGE
0,Javi,Nadiz,12
1,Yulia,Del Monte,22
2,Ariel,Losas,11


In [68]:
# for renaming selected columns
df.rename(columns={'FIRST_NAME': 'name', 'LAST_NAME': 'surname'}, inplace = True)
# inplace argument is required to make changes permanent
df.rename(columns={'AGE': 'age'}, inplace = True)
df

Unnamed: 0,name,surname,age
0,Javi,Nadiz,12
1,Yulia,Del Monte,22
2,Ariel,Losas,11


## Modifying row values

In [69]:
df.loc[1] = ['Anna', 'smith', 33]
df

Unnamed: 0,name,surname,age
0,Javi,Nadiz,12
1,Anna,smith,33
2,Ariel,Losas,11


In [70]:
# Modifiying only selected values
df.loc[1, ['name', 'age']] = ['julia', 21]
df

Unnamed: 0,name,surname,age
0,Javi,Nadiz,12
1,julia,smith,21
2,Ariel,Losas,11


In [71]:
# for a single field value, you can use .at
df.at[1, 'age'] = 19
df

Unnamed: 0,name,surname,age
0,Javi,Nadiz,12
1,julia,smith,19
2,Ariel,Losas,11


In [72]:
# You will not be able to set a value on a view
# Instead you must make use of the .loc .ilot or .at
filt = (df['name'] == 'Javi')
df[filt]['age'] = 11

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['age'] = 11


In [73]:
df['name'] = df['name'].str.capitalize()
df

Unnamed: 0,name,surname,age
0,Javi,Nadiz,12
1,Julia,smith,19
2,Ariel,Losas,11


### Apply

In [74]:
def update_surname(surname):
    return surname.capitalize()

# You can use .apply for aplaying a function to an entire column
df['surname'] = df['surname'].apply(update_surname)
df

Unnamed: 0,name,surname,age
0,Javi,Nadiz,12
1,Julia,Smith,19
2,Ariel,Losas,11


In [75]:
# In you run apply to the dataframe it runs once per column
df.apply(lambda x: len(x) )

name       3
surname    3
age        3
dtype: int64

In [76]:
df.apply(lambda x: len(x), axis='columns')

0    3
1    3
2    3
dtype: int64

In [77]:
# It takes the min value of each column
df.apply(pd.Series.min)

name       Ariel
surname    Losas
age           11
dtype: object

## Dataframes and Series mods

In [78]:
# Applays the fucntion to each of the values on the dataset
# Only works with strings values
df[['name', 'surname']].applymap(len)

Unnamed: 0,name,surname
0,4,5
1,5,5
2,5,5


In [79]:
# map is used to modifie values on a series
df['name'].map({'Javi':'Daniel', 'Julia': 'Andrea'})

0    Daniel
1    Andrea
2       NaN
Name: name, dtype: object

In [81]:
# To avoid NaN use replace instead
df['name'].replace({'Javi':'Daniel', 'Julia': 'Andrea'})

0    Daniel
1    Andrea
2     Ariel
Name: name, dtype: object