In [1]:
import pandas as pd

In [2]:
# DATAFRAME 
# Similar to a python dictionary 
# You can create a dataframe from a dictionary 

people = {'name' : ['Juan', 'Laura', 'Nubia'],
          'lastname' :['García','Castro','Rojas'],
          'Family' : ['Son','Daughter','Mom']
          }

df_people = pd.DataFrame(people) # dataframe a collection of series. 
df_people

# 0, 1 and 2 are the index : uniques values for each row

Unnamed: 0,name,lastname,Family
0,Juan,García,Son
1,Laura,Castro,Daughter
2,Nubia,Rojas,Mom


In [3]:
df_people['name']

0     Juan
1    Laura
2    Nubia
Name: name, dtype: object

In [4]:
df_people.name # do not work if the column name have numbers. 

0     Juan
1    Laura
2    Nubia
Name: name, dtype: object

In [5]:
type(df_people['name']) # series are rows of a single column 

pandas.core.series.Series

#### selecting multiple columns 

* This  result in a dataframe not in a series object 

In [6]:
df_people[['name','lastname']]

Unnamed: 0,name,lastname
0,Juan,García
1,Laura,Castro
2,Nubia,Rojas


In [7]:
df_people.columns

Index(['name', 'lastname', 'Family'], dtype='object')

In [8]:
df_people.iloc[0] # returns a series object of the first row 

name          Juan
lastname    García
Family         Son
Name: 0, dtype: object

In [9]:
df_people.iloc[[0,1],2] # first and second row but only the third column in this case the family column 
                        # the first index in the iloc corresponds to the rows [0,1] and the second index corresponds to the columns[2]

0         Son
1    Daughter
Name: Family, dtype: object

In [10]:
df_people.loc[0] # with the label of the row 

name          Juan
lastname    García
Family         Son
Name: 0, dtype: object

In [11]:
df_people.loc[[0,1],'Family'] # with loc we can search by the labels of the columns or the rows. 

0         Son
1    Daughter
Name: Family, dtype: object

In [12]:
df_people.set_index('Family')

Unnamed: 0_level_0,name,lastname
Family,Unnamed: 1_level_1,Unnamed: 2_level_1
Son,Juan,García
Daughter,Laura,Castro
Mom,Nubia,Rojas


In [13]:
df_people

Unnamed: 0,name,lastname,Family
0,Juan,García,Son
1,Laura,Castro,Daughter
2,Nubia,Rojas,Mom


In [14]:
df_people.set_index('Family', inplace=True)

In [15]:
df_people

Unnamed: 0_level_0,name,lastname
Family,Unnamed: 1_level_1,Unnamed: 2_level_1
Son,Juan,García
Daughter,Laura,Castro
Mom,Nubia,Rojas


In [16]:
df_people.iloc[0] 

name          Juan
lastname    García
Name: Son, dtype: object

In [17]:
df_people.reset_index(inplace=True)

In [18]:
df_people

Unnamed: 0,Family,name,lastname
0,Son,Juan,García
1,Daughter,Laura,Castro
2,Mom,Nubia,Rojas


In [19]:
filt = (df_people['lastname'] == 'García') # return all the rows where the lastname is García 

In [20]:
df_people[filt]
# df_people[df_people['lastname]=='García']

Unnamed: 0,Family,name,lastname
0,Son,Juan,García


In [21]:
df_people.loc[filt,'name']  # rows, columns

0    Juan
Name: name, dtype: object

In [22]:
filt = ((df_people['lastname'] == 'García') & (df_people['name'] == 'Juan'))

In [23]:
df_people.loc[filt,'name']

0    Juan
Name: name, dtype: object

#### the ~ is for obtaining the 'negation' of that filter. 

In [24]:
df_people.loc[~filt,'name']

1    Laura
2    Nubia
Name: name, dtype: object

In [25]:
df_people

Unnamed: 0,Family,name,lastname
0,Son,Juan,García
1,Daughter,Laura,Castro
2,Mom,Nubia,Rojas


In [26]:
df_people.columns

Index(['Family', 'name', 'lastname'], dtype='object')

In [27]:
df_people.columns = ['family','first_name','last_name']

In [28]:
df_people.columns = [x.upper() for x in df_people.columns]
df_people

Unnamed: 0,FAMILY,FIRST_NAME,LAST_NAME
0,Son,Juan,García
1,Daughter,Laura,Castro
2,Mom,Nubia,Rojas


In [29]:
df_people.columns = df_people.columns.str.replace(' ', '_')

In [30]:
df_people.columns = [x.lower() for x in df_people.columns]
df_people

Unnamed: 0,family,first_name,last_name
0,Son,Juan,García
1,Daughter,Laura,Castro
2,Mom,Nubia,Rojas


In [31]:
df_people.rename(columns={'first_name':'first','last_name':'last'}, inplace = True)

In [32]:
df_people

Unnamed: 0,family,first,last
0,Son,Juan,García
1,Daughter,Laura,Castro
2,Mom,Nubia,Rojas


In [33]:
df_people.loc[2]

family      Mom
first     Nubia
last      Rojas
Name: 2, dtype: object

In [34]:
df_people.iloc[2]

family      Mom
first     Nubia
last      Rojas
Name: 2, dtype: object

In [35]:
df_people.loc[2] = ['Madre','Azucena','Rojas']
df_people

Unnamed: 0,family,first,last
0,Son,Juan,García
1,Daughter,Laura,Castro
2,Madre,Azucena,Rojas


In [36]:
df_people.loc[2,['family','first']]

family      Madre
first     Azucena
Name: 2, dtype: object

In [37]:
df_people['family'] = df_people['family'].str.upper()
df_people

Unnamed: 0,family,first,last
0,SON,Juan,García
1,DAUGHTER,Laura,Castro
2,MADRE,Azucena,Rojas


#### **apply function over a column**

In [38]:
df_people['family'].apply(len)

0    3
1    8
2    5
Name: family, dtype: int64

In [39]:
df_people['family'] = df_people['family'].apply(lambda x : x.lower())

In [40]:
df_people

Unnamed: 0,family,first,last
0,son,Juan,García
1,daughter,Laura,Castro
2,madre,Azucena,Rojas


In [41]:
df_people['family'].apply(len) # lenght of each value in the family series 

0    3
1    8
2    5
Name: family, dtype: int64

In [42]:
df_people.apply(len, axis='columns')

0    3
1    3
2    3
dtype: int64

In [43]:
df_people.apply(pd.Series.min) # min values in alphabetical order. 

family    daughter
first      Azucena
last        Castro
dtype: object

In [44]:
df_people.apply(lambda x:x.min())

family    daughter
first      Azucena
last        Castro
dtype: object

### **apply map** only works for dataframe objects 

In [45]:
df_people.applymap(len) # apply to the whole dataframe. The applymap function does not work with series objects. 

Unnamed: 0,family,first,last
0,3,4,6
1,8,5,6
2,5,7,5


In [46]:
df_people.applymap(str.lower)

Unnamed: 0,family,first,last
0,son,juan,garcía
1,daughter,laura,castro
2,madre,azucena,rojas


In [47]:
df_people['first'].map({'juan':'david','laura':'daniela'})

0    NaN
1    NaN
2    NaN
Name: first, dtype: object

In [48]:
df_people['first'].map({'NaN':'Juan','NaN':'Laura','NaN':'Azucena'})
df_people

Unnamed: 0,family,first,last
0,son,Juan,García
1,daughter,Laura,Castro
2,madre,Azucena,Rojas
