In [10]:
# pasting the previous video people dictionary data

people = {
    "first": ['srinu','sow','abc'],
    "last" : ['balireddy','yalla','abc'],
    "email": ['abc@abc.com','kbc@kbc.com','abc@abc.com']
}

import pandas as pd
df = pd.DataFrame(people)
df

Unnamed: 0,first,last,email
0,srinu,balireddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,abc,abc@abc.com


In [11]:
# to look at all the columns

df.columns

Index(['first', 'last', 'email'], dtype='object')

In [12]:
# if we want to update the column names 
# update all the colummns at once

df.columns = ['First_Name', 'Last_Name', 'Email']

df.columns

# the changes will be applied to df as well

Index(['First_Name', 'Last_Name', 'Email'], dtype='object')

In [13]:
# updating all the columns is not common
# one thing that is lot more common is the need to change a specific column in our df
# for ex, may be all our columns are uppercase and we want them to be lowercase or viceversa
# may be column name has spaces and we want to replace them with _
# IN THIS CASE WE CAN USE LIST COMPREHENSION

# to update all the column name to uppercase

df.columns = [x.upper() for x in df.columns]    # it is upper(), not upper - if only upper is given it generates 
                                                # a object column name which is not readble

In [14]:
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,srinu,balireddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,abc,abc@abc.com


In [16]:
# replaces spaces with underscore
# when using dot notation, if there is a space in the column, it wont work

# first replace the _ with space and then will do viceversa

df.columns = df.columns.str.replace('_',' ')   # inplace=True is not applicable here

In [17]:
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,srinu,balireddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,abc,abc@abc.com


In [21]:
# repacling space with _

df.columns = df.columns.str.replace(' ','_')
df                          

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,srinu,balireddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,abc,abc@abc.com


In [23]:
# lets change it back to lower case

df.columns = [x.lower() for x in df.columns]
df

Unnamed: 0,first_name,last_name,email
0,srinu,balireddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,abc,abc@abc.com


In [25]:
# up until now, we are updating all the columns
# we can use RENAME method to update a specific column
# just pass in a dictionary of the columns we want to change
# dict key will be existing column name and value will be new column name that we want

df.rename(columns = {'first_name':'first', 'last_name':'last'}, inplace=True)
df

Unnamed: 0,first,last,email
0,srinu,balireddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,abc,abc@abc.com


In [38]:
# now lets look at the way to update rows
# we can use loc method to update our rows

df.loc[2] = ['abc1','abc2','abc1@abc.com']     # this is not a good way when we have lot of columns

# we can use columns in the loc method along with rows and can easily do updates

df.loc[2,['last','email']] = ['abc','abc@abc.com']
df

# change a single value

df.loc[2,'first'] = 'abc'    # here it should be a single value and not a list, 
                             # if we give list, the value will have brackets
df

Unnamed: 0,first,last,email
0,srinu,balireddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,abc,abc@abc.com


In [42]:
# the other way is to use .at method, this is may be for performance improvement

df.at[2,'email'] = 'email.com'
df

df.at[2,['last','email']] = ['ab1c','abc33@abc.com']
df

Unnamed: 0,first,last,email
0,srinu,balireddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,ab1c,abc33@abc.com


In [46]:
# one mistake which is when we try to change a value without using the indexers,
# without using .loc or .at

# by using filter

filt = (df['email'] == "abc@abc.com")

df[filt]

# we can access last using df[filt]

df[filt]['last']

# if we try to pass in a value for the above line, it will throw a warning,
# also note the value wont be updated

df[filt]['last'] = 'reddy'

#  the reason it didn't work here because, essentially the way we are doing it here
# requires multiple operations in the bg which can determine whether pandas
# returns a view or a copy of our df. so when our value isn't getting set ,
# it's bcoz it's getting set on a temp object and that is getting tossed out immediately after

# SO IT'S BETTER TO USE .LOC OR .AT FOR UPDATING VALUES

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [48]:
# using loc we can update the row using above filt variable

df.loc[filt,'last'] = 'reddy'
df

Unnamed: 0,first,last,email
0,srinu,reddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,ab1c,abc33@abc.com


In [52]:
# updating more than one row

df['email'].str.lower() # this just return the lower case values.

# to change the email in df

df['email'] = df['email'].str.lower()
df

Unnamed: 0,first,last,email
0,srinu,reddy,abc@abc.com
1,sow,yalla,kbc@kbc.com
2,abc,ab1c,abc33@abc.com
