In [188]:
# Loading Dataset

import pandas as pd

df = pd.read_csv('/content/raw_dataset.csv')

In [189]:
#Preview Dataset
df

Unnamed: 0,First Name,Last name,Age,SALARY,STREET Address1,STREET Address2,STREET Address3,email
0,Joel,Padilla,10/28/2019,$92.32,"431-6530 Eu, Rd.",364-2264 Augue Rd.,"P.O. Box 864, 3882 Orci Street",eu@nibh.com
1,Fritz,Tyler,9/27/2019,$83.91,Ap #377-2267 Ac Av.,979-2228 Vel Ave,9865 Eu Av.,est.ac.mattis@malesuadafringilla.net
2,Wing,Phelps,2/18/2019,$17.15,Ap #545-5786 Pulvinar Ave,Ap #973-5781 Sagittis Avenue,9959 Ut St.,dolor@cubilia.net
3,Ryan,Ross,5/21/2019,$45.97,634-7858 Id Road,907-8824 Fringilla Ave,318-5271 In Ave,interdum.libero.dui@vitaeerat.com


In [190]:
#Preview Columns in Dataset
df.columns

Index(['First Name', 'Last name', 'Age', 'SALARY', 'STREET Address1',
       'STREET Address2', 'STREET Address3', 'email'],
      dtype='object')

In [191]:
#Get the Column Names as List
df.columns.tolist()

['First Name',
 'Last name',
 'Age',
 'SALARY',
 'STREET Address1',
 'STREET Address2',
 'STREET Address3',
 'email']

In [192]:
#Convert Column Names to Series/Dataframe
df.columns.to_series()

First Name              First Name
Last name                Last name
Age                            Age
SALARY                      SALARY
STREET Address1    STREET Address1
STREET Address2    STREET Address2
STREET Address3    STREET Address3
email                        email
dtype: object

In [194]:
#Check if Column Names are duplucated
df.columns.duplicated()

array([False, False, False, False, False, False, False, False])

In [195]:
# Making Columns Name lower Case
df.columns.str.lower()

Index(['first name', 'last name', 'age', 'salary', 'street address1',
       'street address2', 'street address3', 'email'],
      dtype='object')

In [196]:
# Making Columns Name upper Case
df.columns.str.upper()

Index(['FIRST NAME', 'LAST NAME', 'AGE', 'SALARY', 'STREET ADDRESS1',
       'STREET ADDRESS2', 'STREET ADDRESS3', 'EMAIL'],
      dtype='object')

In [197]:
# Making Columns Name Proper/Title Case
df.columns.str.title()

Index(['First Name', 'Last Name', 'Age', 'Salary', 'Street Address1',
       'Street Address2', 'Street Address3', 'Email'],
      dtype='object')

In [198]:
#Replacing Empty Spaces with Underscore
df.columns.str.replace(' ', '_')

Index(['First_Name', 'Last_name', 'Age', 'SALARY', 'STREET_Address1',
       'STREET_Address2', 'STREET_Address3', 'email'],
      dtype='object')

In [199]:
#Renaming Column Names
df.rename(columns={'Age':'Dates_of_Birth'},inplace=True)
df

Unnamed: 0,First Name,Last name,Dates_of_Birth,SALARY,STREET Address1,STREET Address2,STREET Address3,email
0,Joel,Padilla,10/28/2019,$92.32,"431-6530 Eu, Rd.",364-2264 Augue Rd.,"P.O. Box 864, 3882 Orci Street",eu@nibh.com
1,Fritz,Tyler,9/27/2019,$83.91,Ap #377-2267 Ac Av.,979-2228 Vel Ave,9865 Eu Av.,est.ac.mattis@malesuadafringilla.net
2,Wing,Phelps,2/18/2019,$17.15,Ap #545-5786 Pulvinar Ave,Ap #973-5781 Sagittis Avenue,9959 Ut St.,dolor@cubilia.net
3,Ryan,Ross,5/21/2019,$45.97,634-7858 Id Road,907-8824 Fringilla Ave,318-5271 In Ave,interdum.libero.dui@vitaeerat.com


In [200]:
#Checking the lenght of Columns
len(df.columns)

8

In [201]:
#Renaming Column Names using indexing (e.g Dates_of_Birth column)

df.columns.values[2] = "DOB"
df

Unnamed: 0,First Name,Last name,DOB,SALARY,STREET Address1,STREET Address2,STREET Address3,email
0,Joel,Padilla,10/28/2019,$92.32,"431-6530 Eu, Rd.",364-2264 Augue Rd.,"P.O. Box 864, 3882 Orci Street",eu@nibh.com
1,Fritz,Tyler,9/27/2019,$83.91,Ap #377-2267 Ac Av.,979-2228 Vel Ave,9865 Eu Av.,est.ac.mattis@malesuadafringilla.net
2,Wing,Phelps,2/18/2019,$17.15,Ap #545-5786 Pulvinar Ave,Ap #973-5781 Sagittis Avenue,9959 Ut St.,dolor@cubilia.net
3,Ryan,Ross,5/21/2019,$45.97,634-7858 Id Road,907-8824 Fringilla Ave,318-5271 In Ave,interdum.libero.dui@vitaeerat.com


In [202]:
# Selecting all columns except one
df.columns[df.columns != 'DOB']

Index(['First Name', 'Last name', 'SALARY', 'STREET Address1',
       'STREET Address2', 'STREET Address3', 'email'],
      dtype='object')

In [203]:
# Selecting all columns except one using LOC
df.loc[:,df.columns != 'DOB']

Unnamed: 0,First Name,Last name,SALARY,STREET Address1,STREET Address2,STREET Address3,email
0,Joel,Padilla,$92.32,"431-6530 Eu, Rd.",364-2264 Augue Rd.,"P.O. Box 864, 3882 Orci Street",eu@nibh.com
1,Fritz,Tyler,$83.91,Ap #377-2267 Ac Av.,979-2228 Vel Ave,9865 Eu Av.,est.ac.mattis@malesuadafringilla.net
2,Wing,Phelps,$17.15,Ap #545-5786 Pulvinar Ave,Ap #973-5781 Sagittis Avenue,9959 Ut St.,dolor@cubilia.net
3,Ryan,Ross,$45.97,634-7858 Id Road,907-8824 Fringilla Ave,318-5271 In Ave,interdum.libero.dui@vitaeerat.com


In [204]:
# Viewing only one column in the dataframe using LOC
df.loc[:,df.columns.isin(['DOB'])]

Unnamed: 0,DOB
0,10/28/2019
1,9/27/2019
2,2/18/2019
3,5/21/2019


In [205]:
# Select Column Names That Begins with a Word or Character using Filter
df.filter(like='STREET')

#Note that this function is case sensitive

Unnamed: 0,STREET Address1,STREET Address2,STREET Address3
0,"431-6530 Eu, Rd.",364-2264 Augue Rd.,"P.O. Box 864, 3882 Orci Street"
1,Ap #377-2267 Ac Av.,979-2228 Vel Ave,9865 Eu Av.
2,Ap #545-5786 Pulvinar Ave,Ap #973-5781 Sagittis Avenue,9959 Ut St.
3,634-7858 Id Road,907-8824 Fringilla Ave,318-5271 In Ave


In [206]:
# Select Column Names That Begins with a Word or Character using loc
df.loc[:,df.columns.str.startswith('STREET')]

#Note that this function is case sensitive

Unnamed: 0,STREET Address1,STREET Address2,STREET Address3
0,"431-6530 Eu, Rd.",364-2264 Augue Rd.,"P.O. Box 864, 3882 Orci Street"
1,Ap #377-2267 Ac Av.,979-2228 Vel Ave,9865 Eu Av.
2,Ap #545-5786 Pulvinar Ave,Ap #973-5781 Sagittis Avenue,9959 Ut St.
3,634-7858 Id Road,907-8824 Fringilla Ave,318-5271 In Ave


In [207]:
# Select Column Names That Ends with a Word or Character using Filter
df.filter(regex='ame$',axis=1)

#Note that this function is case sensitive

Unnamed: 0,First Name,Last name
0,Joel,Padilla
1,Fritz,Tyler
2,Wing,Phelps
3,Ryan,Ross


In [208]:
# Select Column Names That Begins with a Word or Character using loc
df.loc[:,df.columns.str.endswith('ame')]
#Note that this function is case sensitive

Unnamed: 0,First Name,Last name
0,Joel,Padilla
1,Fritz,Tyler
2,Wing,Phelps
3,Ryan,Ross


In [209]:
# Select A group of column Names with indexing

df.columns[[0,1,2]]

Index(['First Name', 'Last name', 'DOB'], dtype='object')

In [210]:
#Joining Two Columns together
df['Full_Name'] = df['First Name'] + ' ' + df['Last name']

#Viewing the new column
df.loc[:,df.columns.isin(['Full_Name'])].head()

Unnamed: 0,Full_Name
0,Joel Padilla
1,Fritz Tyler
2,Wing Phelps
3,Ryan Ross


In [211]:
#Dropping multiple column
columns_to_drop = ['First Name', 'Last name']
df.drop(columns=columns_to_drop, inplace=True)
df

Unnamed: 0,DOB,SALARY,STREET Address1,STREET Address2,STREET Address3,email,Full_Name
0,10/28/2019,$92.32,"431-6530 Eu, Rd.",364-2264 Augue Rd.,"P.O. Box 864, 3882 Orci Street",eu@nibh.com,Joel Padilla
1,9/27/2019,$83.91,Ap #377-2267 Ac Av.,979-2228 Vel Ave,9865 Eu Av.,est.ac.mattis@malesuadafringilla.net,Fritz Tyler
2,2/18/2019,$17.15,Ap #545-5786 Pulvinar Ave,Ap #973-5781 Sagittis Avenue,9959 Ut St.,dolor@cubilia.net,Wing Phelps
3,5/21/2019,$45.97,634-7858 Id Road,907-8824 Fringilla Ave,318-5271 In Ave,interdum.libero.dui@vitaeerat.com,Ryan Ross
