# Pandas Package

In [1]:
# load the panda and numpy packages
import pandas as pd
import numpy as np


In [2]:
# create dataframe with pandas DataFrame() constructor

dataframe = pd.DataFrame(
    data= [['abc',3.3, 28, True],
           ['xyz', -.55, 0, False]],
    columns =['label1', 'label2', 'label3', ' label4'],
    index = [0,1 ])

In [3]:
# Display the dataframe
dataframe

Unnamed: 0,label1,label2,label3,label4
0,abc,3.3,28,True
1,xyz,-0.55,0,False


In [4]:
dataframe.shape

(2, 4)

In [5]:
# Create an array
array = np.array(
    object=[[[1,2,3], [4,5,6]], 
                 [[11,12,13], [14, 15, 16] ] ])

In [6]:
# Return the array's shape
array.shape

(2, 2, 3)

# Subsetting data

In [7]:
# load the pandas package
import pandas as pd

In [8]:
# Display the country dataframe
country = pd.read_csv('country.csv')

In [27]:
# Display the country dataframe
country

Unnamed: 0,Country,Continent,SurfaceArea,Population,Density,IndependenceDate,OfficialLanguage,Percentage
0,China,Asia,9572900,1277558000,133,1949-10-01,Mandarin,0.81
1,Bangladesh,Asia,143998,129155000,897,1905-05-24,Bengali,0.98
2,Brazil,South America,8547403,170115000,20,1822-09-07,Portuguese,0.98
3,India,Asia,3287263,1013662000,308,1905-04-30,Hindi,0.4
4,Norway,Europe,385207,5379000,14,1905-05-17,Norwegian,0.97
5,United States,North America,9363520,278357000,30,1776-07-04,English,0.86


In [26]:
# Select the 'Name' column Returns a series
country['Country']

0            China
1       Bangladesh
2           Brazil
3            India
4           Norway
5    United States
Name: Country, dtype: object

In [11]:
# Putting an extra bracket around the column label returns a dataframe
country[['Country']]

Unnamed: 0,Country
0,China
1,Bangladesh
2,Brazil
3,India
4,Norway
5,United States


In [12]:
# Selecting the country and continent columns

country[['Country', 'Continent']]

Unnamed: 0,Country,Continent
0,China,Asia
1,Bangladesh,Asia
2,Brazil,South America
3,India,Asia
4,Norway,Europe
5,United States,North America


In [13]:
# Select the element in row 0 and column 1 
country.iloc[0,1]

'Asia'

In [14]:
# Select rows 0 and 1 and column 1
country.iloc[0:2, 1]

0    Asia
1    Asia
Name: Continent, dtype: object

In [15]:
# Select all row before row 7 and columns 1 thru 2
country.iloc[:7,1:3]

Unnamed: 0,Continent,SurfaceArea
0,Asia,9572900
1,Asia,143998
2,South America,8547403
3,Asia,3287263
4,Europe,385207
5,North America,9363520


In [16]:
# select row 10 thru 20 and all from from column 1 onwards\
country.iloc[10:21, 1:]

Unnamed: 0,Continent,SurfaceArea,Population,Density,IndependenceDate,OfficialLanguage,Percentage


In [21]:
# select rows 1 thru 6 and the continent and Population columns
country.loc[1:5, ['Continent', 'Population']]

Unnamed: 0,Continent,Population
1,Asia,129155000
2,South America,170115000
3,Asia,1013662000
4,Europe,5379000
5,North America,278357000


In [22]:
# Select rows where the continent is Asia
country[country['Continent']== 'Asia']

Unnamed: 0,Country,Continent,SurfaceArea,Population,Density,IndependenceDate,OfficialLanguage,Percentage
0,China,Asia,9572900,1277558000,133,1949-10-01,Mandarin,0.81
1,Bangladesh,Asia,143998,129155000,897,1905-05-24,Bengali,0.98
3,India,Asia,3287263,1013662000,308,1905-04-30,Hindi,0.4


In [24]:
#Select rows where the continent is not Asia
country[country['Continent'] != 'Asia']

Unnamed: 0,Country,Continent,SurfaceArea,Population,Density,IndependenceDate,OfficialLanguage,Percentage
2,Brazil,South America,8547403,170115000,20,1822-09-07,Portuguese,0.98
4,Norway,Europe,385207,5379000,14,1905-05-17,Norwegian,0.97
5,United States,North America,9363520,278357000,30,1776-07-04,English,0.86


In [28]:
# Select rows where the Continent is Asia and Europe
country[(country['Continent'] == "Asia") | (country['Continent'] == 'Europe')] 

Unnamed: 0,Country,Continent,SurfaceArea,Population,Density,IndependenceDate,OfficialLanguage,Percentage
0,China,Asia,9572900,1277558000,133,1949-10-01,Mandarin,0.81
1,Bangladesh,Asia,143998,129155000,897,1905-05-24,Bengali,0.98
3,India,Asia,3287263,1013662000,308,1905-04-30,Hindi,0.4
4,Norway,Europe,385207,5379000,14,1905-05-17,Norwegian,0.97


In [29]:
# Select rows where the continent is not in Asia
country[~(country['Continent'] == 'Asia')]

Unnamed: 0,Country,Continent,SurfaceArea,Population,Density,IndependenceDate,OfficialLanguage,Percentage
2,Brazil,South America,8547403,170115000,20,1822-09-07,Portuguese,0.98
4,Norway,Europe,385207,5379000,14,1905-05-17,Norwegian,0.97
5,United States,North America,9363520,278357000,30,1776-07-04,English,0.86


In [34]:
# Remove row containing duplicates values subset= 'Continent'
country.drop_duplicates(subset='Continent')

Unnamed: 0,Country,Continent,SurfaceArea,Population,Density,IndependenceDate,OfficialLanguage,Percentage
0,China,Asia,9572900,1277558000,133,1949-10-01,Mandarin,0.81
2,Brazil,South America,8547403,170115000,20,1822-09-07,Portuguese,0.98
4,Norway,Europe,385207,5379000,14,1905-05-17,Norwegian,0.97
5,United States,North America,9363520,278357000,30,1776-07-04,English,0.86


In [37]:
# Sorting the population row

country.sort_values(axis=0, by="Population")

Unnamed: 0,Country,Continent,SurfaceArea,Population,Density,IndependenceDate,OfficialLanguage,Percentage
4,Norway,Europe,385207,5379000,14,1905-05-17,Norwegian,0.97
1,Bangladesh,Asia,143998,129155000,897,1905-05-24,Bengali,0.98
2,Brazil,South America,8547403,170115000,20,1822-09-07,Portuguese,0.98
5,United States,North America,9363520,278357000,30,1776-07-04,English,0.86
3,India,Asia,3287263,1013662000,308,1905-04-30,Hindi,0.4
0,China,Asia,9572900,1277558000,133,1949-10-01,Mandarin,0.81
