In [1]:
#DataFrame is a table with rows and columns. Each column in a DataFrame is a 
# Series object, rows consist of elements insder Series. 

In [2]:
import pandas as pd

In [3]:
#Below is an example of how you create a Pandas DataFrame
df = pd.DataFrame({"country": ["United States", "France", "Mexico", "Brazil"], 
                  "population": [17.04, 143.5, 9.5, 45.5],
                  "square": [2724902, 17125191, 207600, 603682]})
df

Unnamed: 0,country,population,square
0,United States,17.04,2724902
1,France,143.5,17125191
2,Mexico,9.5,207600
3,Brazil,45.5,603682


In [4]:
type(df)

pandas.core.frame.DataFrame

In [5]:
#Renaming the index and creating a label for the index
df.index = ["US", "FR", "MX", "BR"]
df.index.name = "Country Code"
df

Unnamed: 0_level_0,country,population,square
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
US,United States,17.04,2724902
FR,France,143.5,17125191
MX,Mexico,9.5,207600
BR,Brazil,45.5,603682


In [9]:
#looking up a column in a DataFrame. Notice when looking up a column you also
# get the connected index with each element in the country column.
df["country"]

Country Code
US    United States
FR           France
MX           Mexico
BR           Brazil
Name: country, dtype: object

In [12]:
#Row access using index can be performed in several way:
# using .loc and providing index label
# using .iloc and providing index number

In [13]:
#When using "loc" you are retrieving the elements in the same row in index "US"
df.loc["US"]

country            US
population      17.04
square        2724902
Name: US, dtype: object

In [14]:
# When using "iloc" you are retrieving the same information above but the only 
# difference is that you are using the index number and not the index label.
df.iloc[0]

country            US
population      17.04
square        2724902
Name: US, dtype: object

In [22]:
#Below I'm searching two rows and one column
df.loc[["US", "MX"], "population"]

Country Code
US    17.04
MX     9.50
Name: population, dtype: float64

In [19]:
#.loc takes two arguments: index list and column list, slicing ":" operation is
#supported as well
df.loc["US":"BR", :]

Unnamed: 0_level_0,country,population,square
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
US,US,17.04,2724902
FR,France,143.5,17125191
MX,Mexico,9.5,207600
BR,Brazil,45.5,603682


In [26]:
#Filtering is performed as below. Right after the filtering you have the option
# to call the specific columns you want to review with the filtering.
#By the way, columns can be accessed using attribute or Python dict notation, 
#for example df.population and df[‘population’] are the same operations.
df[df.population > 10] [['country', 'square']]

Unnamed: 0_level_0,country,square
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1
US,US,2724902
FR,France,17125191
BR,Brazil,603682


In [34]:
# You can reset the index as below. As you see you the former index becomes a
# column.
df.reset_index()

Unnamed: 0,Country Code,country,population,square
0,US,US,17.04,2724902
1,FR,France,143.5,17125191
2,MX,Mexico,9.5,207600
3,BR,Brazil,45.5,603682


In [None]:
# The reset of the index doesn't permanently stay for variable df. As you can 
#see below if I run df.shape, the row/column shape is still (4,3)

In [36]:
df.shape

(4, 3)

In [None]:
# If you want "Country Code" to become a new column within the DataFrame you
# you have to create a new variable on df.reset_index()
#By default when you manipulate a DataFrame, pandas will return a new instance 
#(the old one will not be affected).

In [37]:
reset_df = df.reset_index()
reset_df

Unnamed: 0,Country Code,country,population,square
0,US,US,17.04,2724902
1,FR,France,143.5,17125191
2,MX,Mexico,9.5,207600
3,BR,Brazil,45.5,603682


In [38]:
reset_df.shape

(4, 4)

In [39]:
#Creating a new column called "Density"
df["density"] = df["population"] / df.square * 1000000
df

Unnamed: 0_level_0,country,population,square,density
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
US,US,17.04,2724902,6.253436
FR,France,143.5,17125191,8.379469
MX,Mexico,9.5,207600,45.761079
BR,Brazil,45.5,603682,75.370808


In [45]:
#If you don't like the new column, you can delete it. You need to specify the
#axis to equal column
# Another option is to use del df['density']
df.drop(["density"], axis = "columns")

Unnamed: 0_level_0,country,population,square
Country Code,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
US,US,17.04,2724902
FR,France,143.5,17125191
MX,Mexico,9.5,207600
BR,Brazil,45.5,603682


In [54]:
# Renaming a column. Remember you have to reset the index in order to rename
#  column: "Country Code"
# Also see how I used to functions on df. First one is the .reset_index and the 
# second is .rename()
df.reset_index().rename(columns={"Country Code": "country_code"})

Unnamed: 0,country_code,country,population,square,density
0,US,US,17.04,2724902,6.253436
1,FR,France,143.5,17125191,8.379469
2,MX,Mexico,9.5,207600,45.761079
3,BR,Brazil,45.5,603682,75.370808
