In [1]:
import pandas as pd
people = {
    "first" : ['Kas', 'Tas', 'Bas','Nik'],
    "last" : ['Sar', 'Kar', 'Sar','kus'],
    "email" : ['kar@sar.com', 'tas@kar.com', 'bas@sar.com', 'nik@kus.com']
}

df = pd.DataFrame(people)
df.columns

Index(['first', 'last', 'email'], dtype='object')

## Change Columns name

In [7]:
df.columns = ['first name', 'last name', 'email']
df

Unnamed: 0,first name,last name,email
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nik,kus,nik@kus.com


In [8]:
df.columns = [x.upper() for x in df.columns]
df

Unnamed: 0,FIRST NAME,LAST NAME,EMAIL
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nik,kus,nik@kus.com


In [9]:
df.columns = df.columns.str.replace(' ','_')
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nik,kus,nik@kus.com


In [10]:
df.rename(columns={'FIRST_NAME': 'first', 'LAST_NAME': 'last'})

Unnamed: 0,first,last,EMAIL
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nik,kus,nik@kus.com


In [11]:
# though the column name changes in the previous cell but it is not updated the table
df

Unnamed: 0,FIRST_NAME,LAST_NAME,EMAIL
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nik,kus,nik@kus.com


In [19]:
# inplace = True is required to update the table as well
df.rename(columns={'FIRST_NAME': 'first', 'LAST_NAME': 'last', 'EMAIL': 'email'}, inplace=True)
df

Unnamed: 0,first,last,email
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nikus,Giri,nikus@giri.com


## Change data in table

In [20]:
# Change all columns value in a particular row
df.loc[3] = ['Nikus', 'Giri', 'nikus@giri.com']
df

Unnamed: 0,first,last,email
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nikus,Giri,nikus@giri.com


In [67]:
# appending a row
df.loc[len(df.index)] = ['Pinky','Rani','pinky@rani.com']
df

Unnamed: 0,first,last,email
0,kas,sar,kar@sar.com
1,tas,kar,tas@kar.com
2,bas,sar,bas@sar.com
3,nik,giri,nik@giri.com
4,Pinky,Rani,pinky@rani.com


In [86]:
# Concat two dataframes
people2 = {
    "first" : ['Chinu', 'Tinu'],
    "last" : ['lal', 'pal'],
    "email" : ['chinu@lal.com', 'tinu@pal.com'],
    "age" : [34,43]
}

df2 = pd.DataFrame(people2)
df3 = pd.concat([df,df2],ignore_index=True)
df3


Unnamed: 0,first,last,email,age
0,kas,sar,kar@sar.com,
1,tas,kar,tas@kar.com,
2,bas,sar,bas@sar.com,
3,nik,giri,nik@giri.com,
4,Pinky,Rani,pinky@rani.com,
5,Chinu,lal,chinu@lal.com,34.0
6,Tinu,pal,tinu@pal.com,43.0


In [87]:
# we can use either of the following two options to convert the column age to integers
# df3['age'] = df3['age'].fillna(0).apply(int)
df3['age'] = df3['age'].fillna(0).astype(int)
df3

Unnamed: 0,first,last,email,age
0,kas,sar,kar@sar.com,0
1,tas,kar,tas@kar.com,0
2,bas,sar,bas@sar.com,0
3,nik,giri,nik@giri.com,0
4,Pinky,Rani,pinky@rani.com,0
5,Chinu,lal,chinu@lal.com,34
6,Tinu,pal,tinu@pal.com,43


In [21]:
# To change specific column in a row
df.loc[3,['last','email']] = ['Giridhar', 'nikus@girdhar.com']
df

Unnamed: 0,first,last,email
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nikus,Giridhar,nikus@girdhar.com


In [22]:
# Changing only a single value no need to pass the column name as list
df.loc[3, 'last'] = 'Giri'
df

Unnamed: 0,first,last,email
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nikus,Giri,nikus@girdhar.com


In [24]:
# Can change a value using at as well.. Not sure what the difference b/w loc and at. Why two ways to do an operation
df.at[3,'email'] = 'nikus@giri.com'
df

Unnamed: 0,first,last,email
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nikus,Giri,nikus@giri.com


In [41]:
# replace value after filter operation
filt = (df['last'] == 'Giri')
df.loc[filt,['first','email']] = ['Nik','nik@giri.com']
df

Unnamed: 0,first,last,email
0,Kas,Sar,kar@sar.com
1,Tas,Kar,tas@kar.com
2,Bas,Sar,bas@sar.com
3,Nik,Giri,nik@giri.com


In [44]:
# Change all first and last name to lowercase
df['first'] = df['first'].str.lower()
df['last'] = df['last'].str.lower()
df

Unnamed: 0,first,last,email
0,kas,sar,kar@sar.com
1,tas,kar,tas@kar.com
2,bas,sar,bas@sar.com
3,nik,giri,nik@giri.com


In [66]:
len(df.index)


4

## Functions used for string manipulation
* apply --> worked on Series. It apply the operation for each object of the series.
* map
* applymap
* replace

In [57]:
# It shows the number of charecters of the passes series
df['email'].apply(len)

0    11
1    11
2    11
3    12
Name: email, dtype: int64

In [59]:
# Instead of passing one column as series, if we pass the entire df then apply counts the number of rows for each columns
df.apply(len)

first    4
last     4
email    4
dtype: int64

In [60]:
# we can find the number of rows for each column individually
len(df['email'])

4

In [63]:
# we can also find the number of columns in each row by passing "columns" as axis. Rows are default axis.
df.apply(len, axis='columns')

0    3
1    3
2    3
3    3
dtype: int64

In [50]:
def update_email(email):
    return email.upper()

In [54]:
df['email'] = df['email'].apply(update_email)
df

Unnamed: 0,first,last,email
0,kas,sar,KAR@SAR.COM
1,tas,kar,TAS@KAR.COM
2,bas,sar,BAS@SAR.COM
3,nik,giri,NIK@GIRI.COM


In [55]:
df['email'] = df['email'].apply(lambda x: x.lower())
df

Unnamed: 0,first,last,email
0,kas,sar,kar@sar.com
1,tas,kar,tas@kar.com
2,bas,sar,bas@sar.com
3,nik,giri,nik@giri.com
