# Renaming columns of a data frame

In [1]:
import pandas as pd

In [2]:
ufo = pd.read_csv('http://bit.ly/uforeports')

In [3]:
ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


# How to view the list of columns? (A dataframe has an attribute named column)

In [4]:
ufo.columns

Index(['City', 'Colors Reported', 'Shape Reported', 'State', 'Time'], dtype='object')

# Method 1 ---- Use the rename method, with arguments (columns={Dict}, inplace=True)

The dictionary contains the old column names as key and the new ones as values

In [5]:
ufo.rename(columns={'Colors Reported' :'Colors_Reported', 'Shape Reported':'Shape_Reported'}, inplace=True)

ufo.columns

#Boom, as we can see, the columns have been renamed

Index(['City', 'Colors_Reported', 'Shape_Reported', 'State', 'Time'], dtype='object')

# Method 2 ------ Overwrite the list of columns, (in order)

In [6]:
new_columns_name = ['City', 'Colors__Reported', 'Shape__Reported', 'State', 'Time']

ufo.columns = new_columns_name

ufo.head()

#Boom, Done!

Unnamed: 0,City,Colors__Reported,Shape__Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


# Method 3 ---- Renaming in order while reading the csv files

Tricky Syntax, so pay attention. Pass the list of the name of columns as names=new_name_of_columns and put header=0 indicating that the zeroth row of the input file need to be renamed

### It's names as multiple columns can exist

In [7]:
ufo = pd.read_csv('http://bit.ly/uforeports', names=new_columns_name, header=0)

ufo.columns

#Boom Done

Index(['City', 'Colors__Reported', 'Shape__Reported', 'State', 'Time'], dtype='object')

# Trick ----- Replacing all columns names that contain space with _. 

This is easy since you can obtain the column name list and apply the string function replace(old,new)

We are using str.replace since it is a function in the string header, and it will be applied to each element in the list

In [8]:
ufo = pd.read_csv('http://bit.ly/uforeports')

ufo.columns.str.replace(' ', '_')

ufo.head()

Unnamed: 0,City,Colors Reported,Shape Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00


# It didn't work, do you know why? Remember to overwrite the columns name

In [9]:
ufo.columns = ufo.columns.str.replace(' ', '_')

ufo.head()

#Boom, it worked

Unnamed: 0,City,Colors_Reported,Shape_Reported,State,Time
0,Ithaca,,TRIANGLE,NY,6/1/1930 22:00
1,Willingboro,,OTHER,NJ,6/30/1930 20:00
2,Holyoke,,OVAL,CO,2/15/1931 14:00
3,Abilene,,DISK,KS,6/1/1931 13:00
4,New York Worlds Fair,,LIGHT,NY,4/18/1933 19:00
