In [2]:
people = {
    'first': ['Kate', 'Jane', 'John'],
    'last': ['Doe', 'Doe', 'Doe'],
    'email': ['Kate@email.com', 'JaneDoe@email.com', 'JohnDoe@email.com']
}

In [3]:
import pandas as pd

In [4]:
df = pd.DataFrame(people)

In [5]:
df

Unnamed: 0,first,last,email
0,Kate,Doe,Kate@email.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [6]:
df['email']

0       Kate@email.com
1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

In [7]:
df.email

0       Kate@email.com
1    JaneDoe@email.com
2    JohnDoe@email.com
Name: email, dtype: object

In [8]:
df[['last', 'email']]

Unnamed: 0,last,email
0,Doe,Kate@email.com
1,Doe,JaneDoe@email.com
2,Doe,JohnDoe@email.com


In [9]:
df.columns

Index(['first', 'last', 'email'], dtype='object')

In [10]:
# searching by index
df.iloc[0]

first              Kate
last                Doe
email    Kate@email.com
Name: 0, dtype: object

In [11]:
# searching by label
df.loc[[0, 1]]

Unnamed: 0,first,last,email
0,Kate,Doe,Kate@email.com
1,Jane,Doe,JaneDoe@email.com


In [12]:
df.iloc[[0, 1], 2]

0       Kate@email.com
1    JaneDoe@email.com
Name: email, dtype: object

In [13]:
df.loc[[0, 1], 'email']

0       Kate@email.com
1    JaneDoe@email.com
Name: email, dtype: object

In [14]:
df.loc[[0, 1], ['email', 'last']]

Unnamed: 0,email,last
0,Kate@email.com,Doe
1,JaneDoe@email.com,Doe


In [15]:
df.set_index('email')

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Kate@email.com,Kate,Doe
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [16]:
df

Unnamed: 0,first,last,email
0,Kate,Doe,Kate@email.com
1,Jane,Doe,JaneDoe@email.com
2,John,Doe,JohnDoe@email.com


In [17]:
df.set_index('email', inplace=True)

In [18]:
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Kate@email.com,Kate,Doe
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [19]:
df.index

Index(['Kate@email.com', 'JaneDoe@email.com', 'JohnDoe@email.com'], dtype='object', name='email')

In [20]:
df.loc['Kate@email.com']

first    Kate
last      Doe
Name: Kate@email.com, dtype: object

In [21]:
df.loc[0]

TypeError: cannot do label indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [0] of <class 'int'>

In [None]:
df.reset_index(inplace=True)
df

In [None]:
filt = (df['last'] == 'Doe')
df[filt]

In [None]:
df.loc[filt, 'email']

In [None]:
filt = (df['last'] == 'Doe') & (df['first'] == 'John')
df[filt]

In [None]:
filt = (df['first'] == 'Jane') | (df['first'] == 'John')
df[filt]

In [None]:
# opposite of the filter:

df[~filt]

### Updating Rows and Columns

In [23]:
df.columns

Index(['first', 'last'], dtype='object')

In [26]:
# updating all columns via assignment
df.columns = ['first_name', 'last_name']
df

Unnamed: 0_level_0,first_name,last_name
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Kate@email.com,Kate,Doe
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [27]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0_level_0,FIRST_NAME,LAST_NAME
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Kate@email.com,Kate,Doe
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [28]:
df.columns = df.columns.str.replace('_', ' ')
df

Unnamed: 0_level_0,FIRST NAME,LAST NAME
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Kate@email.com,Kate,Doe
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [31]:
# updating specific columns
df.rename(columns={'FIRST NAME': 'first', 'LAST NAME': 'last'}, inplace=True)
df

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
Kate@email.com,Kate,Doe
JaneDoe@email.com,Jane,Doe
JohnDoe@email.com,John,Doe


In [48]:
df.loc[2]

email    JohnDoe@email.com
first                 John
last                   Doe
Name: 2, dtype: object

In [50]:
df.loc[2, 'last'] = 'Smith'

In [52]:
df

Unnamed: 0,email,first,last
0,Kate@email.com,Kate,Doe
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Smith


In [54]:
df.at[2, 'last'] = 'Doe'
df

Unnamed: 0,email,first,last
0,Kate@email.com,Kate,Doe
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [57]:
filt = (df['email'] == 'JohnDoe@email.com')
df[filt]['last'] = 'Smith'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[filt]['last'] = 'Smith'


In [58]:
df
# nothing changed

Unnamed: 0,email,first,last
0,Kate@email.com,Kate,Doe
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Doe


In [60]:
# but works with .loc
filt = (df['email'] == 'JohnDoe@email.com')
df.loc[filt, 'last'] = 'Smith'
df

Unnamed: 0,email,first,last
0,Kate@email.com,Kate,Doe
1,JaneDoe@email.com,Jane,Doe
2,JohnDoe@email.com,John,Smith


In [61]:
# updating single column
df['email'] = df['email'].str.lower()
df

Unnamed: 0,email,first,last
0,kate@email.com,Kate,Doe
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Smith


In [63]:
# using apply()
def update_email(email):
    return email.upper()

In [65]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,email,first,last
0,KATE@EMAIL.COM,Kate,Doe
1,JANEDOE@EMAIL.COM,Jane,Doe
2,JOHNDOE@EMAIL.COM,John,Smith


In [66]:
#using lambda functions
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,email,first,last
0,kate@email.com,Kate,Doe
1,janedoe@email.com,Jane,Doe
2,johndoe@email.com,John,Smith


In [67]:
df.apply(pd.Series.min)

email    janedoe@email.com
first                 Jane
last                   Doe
dtype: object

In [68]:
df.apply(lambda x: x.min())

email    janedoe@email.com
first                 Jane
last                   Doe
dtype: object

In [69]:
df.applymap(len)

Unnamed: 0,email,first,last
0,14,4,3
1,17,4,3
2,17,4,5


In [71]:
df.applymap(str.lower)

Unnamed: 0,email,first,last
0,kate@email.com,kate,doe
1,janedoe@email.com,jane,doe
2,johndoe@email.com,john,smith


In [72]:
df['first'].map({'John': 'Jim', 'Jane': 'Mary'})

0     NaN
1    Mary
2     Jim
Name: first, dtype: object

In [73]:
df['first'].replace({'John': 'Jim', 'Jane': 'Mary'})

0    Kate
1    Mary
2     Jim
Name: first, dtype: object