# Data Frame

In [1]:
import pandas as pd
#DataFrame is an analog of a two-dimensional array with both flexible row indices and flexible column names.

In [2]:
population_dictionary = {
                         'Dhaka': 8906039, 
                         'Chittagong': 2592439, 
                         'Khulna': 664728, 
                         'Sylhet': 531663, 
                         'Rajshahi': 451425, 
                         'Mymensingh':389918
                        }
population_dictionary

{'Dhaka': 8906039,
 'Chittagong': 2592439,
 'Khulna': 664728,
 'Sylhet': 531663,
 'Rajshahi': 451425,
 'Mymensingh': 389918}

In [3]:
population = pd.Series(population_dictionary)
population

Dhaka         8906039
Chittagong    2592439
Khulna         664728
Sylhet         531663
Rajshahi       451425
Mymensingh     389918
dtype: int64

In [4]:
population['Dhaka']

8906039

In [5]:
area_dictionary = {
                         'Dhaka': 1464, 
                         'Chittagong': 5283, 
                         'Khulna': 4394, 
                         'Sylhet': 3490, 
                         'Rajshahi': 2407, 
                         'Mymensingh':4363
                        }
area_dictionary

{'Dhaka': 1464,
 'Chittagong': 5283,
 'Khulna': 4394,
 'Sylhet': 3490,
 'Rajshahi': 2407,
 'Mymensingh': 4363}

In [7]:
area = pd.Series(area_dictionary)
area

Dhaka         1464
Chittagong    5283
Khulna        4394
Sylhet        3490
Rajshahi      2407
Mymensingh    4363
dtype: int64

In [11]:
 # By combining two Series, create the a DataFrame
districts = pd.DataFrame({'population': population, 'area': area})
districts

Unnamed: 0,population,area
Dhaka,8906039,1464
Chittagong,2592439,5283
Khulna,664728,4394
Sylhet,531663,3490
Rajshahi,451425,2407
Mymensingh,389918,4363


In [12]:
 districts.index

Index(['Dhaka', 'Chittagong', 'Khulna', 'Sylhet', 'Rajshahi', 'Mymensingh'], dtype='object')

In [13]:
districts.columns

Index(['population', 'area'], dtype='object')

In [14]:
establish_dictionary = {
                         'Dhaka': 1772, 
                         'Chittagong': 1666, 
                         'Khulna': 1882, 
                         'Sylhet': 1782, 
                         'Rajshahi': 1772, 
                         'Mymensingh':1787
                        }
establish_dictionary

{'Dhaka': 1772,
 'Chittagong': 1666,
 'Khulna': 1882,
 'Sylhet': 1782,
 'Rajshahi': 1772,
 'Mymensingh': 1787}

In [15]:
establish = pd.Series(establish_dictionary)
establish

Dhaka         1772
Chittagong    1666
Khulna        1882
Sylhet        1782
Rajshahi      1772
Mymensingh    1787
dtype: int64

In [16]:
districts = pd.DataFrame({'population': population, 'area': area, 'establish': establish})
districts

Unnamed: 0,population,area,establish
Dhaka,8906039,1464,1772
Chittagong,2592439,5283,1666
Khulna,664728,4394,1882
Sylhet,531663,3490,1782
Rajshahi,451425,2407,1772
Mymensingh,389918,4363,1787


In [17]:
districts['population']['Dhaka']

8906039

In [18]:
districts['establish']['Sylhet']

1782



## DataFrame as specialized dictionary

In [19]:
districts['area']

Dhaka         1464
Chittagong    5283
Khulna        4394
Sylhet        3490
Rajshahi      2407
Mymensingh    4363
Name: area, dtype: int64

In [22]:
districts[0]

KeyError: 0

In [24]:
data['population']

NameError: name 'data' is not defined




## Constructing DataFrame objects

In [25]:
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
Dhaka,8906039
Chittagong,2592439
Khulna,664728
Sylhet,531663
Rajshahi,451425
Mymensingh,389918


In [26]:
pd.DataFrame(population, columns=['area'])

Unnamed: 0,area
Dhaka,8906039
Chittagong,2592439
Khulna,664728
Sylhet,531663
Rajshahi,451425
Mymensingh,389918


In [27]:
pd.DataFrame(population, columns=['establish'])

Unnamed: 0,establish
Dhaka,8906039
Chittagong,2592439
Khulna,664728
Sylhet,531663
Rajshahi,451425
Mymensingh,389918


In [28]:
# From a list of dictionaries
# ---------------------------
data = [{'a': i, 'b': 2 * i} for i in range(3)] # list comprehension
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4


In [29]:
 pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [30]:
 pd.DataFrame([{'a': 1, 'b': 2}, {'b': 3, 'c': 4}])

Unnamed: 0,a,b,c
0,1.0,2,
1,,3,4.0


In [31]:
pd.DataFrame({'population': population, 'area': area})

Unnamed: 0,population,area
Dhaka,8906039,1464
Chittagong,2592439,5283
Khulna,664728,4394
Sylhet,531663,3490
Rajshahi,451425,2407
Mymensingh,389918,4363


In [32]:
pd.DataFrame(np.random.rand(3, 2),
 columns=['foo', 'bar'],
 index=['a', 'b', 'c'])pd.DataFrame({'population': population, 'establish': establish})

Unnamed: 0,population,establish
Dhaka,8906039,1772
Chittagong,2592439,1666
Khulna,664728,1882
Sylhet,531663,1782
Rajshahi,451425,1772
Mymensingh,389918,1787


In [36]:
# From a two-dimensional NumPy array
# -----------------------------------

import numpy as np
pd.DataFrame(np.random.rand(3, 2), columns=['foo', 'bar'], index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.7641,0.751891
b,0.732847,0.755422
c,0.329177,0.780012


In [37]:
pd.DataFrame(np.random.rand(3, 2))

Unnamed: 0,0,1
0,0.97696,0.979133
1,0.359595,0.875072
2,0.110755,0.271316


In [38]:


ind = pd.Index([2, 3, 5, 7, 11])
ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

In [39]:
 ind[1]

3

In [40]:
ind[::2] # Python indexing notation for slicing

Int64Index([2, 5, 11], dtype='int64')

In [41]:
print(ind.size, ind.shape, ind.ndim, ind.dtype)

5 (5,) 1 int64


In [42]:
 ind[1] = 0 # tring to modiy but generates error

TypeError: Index does not support mutable operations

In [43]:
# We can not modify the index

In [44]:
indA = pd.Index([1, 3, 5, 7, 9])
indB = pd.Index([2, 3, 5, 7, 11])

In [45]:
indA.intersection(indB) # intersection

Int64Index([3, 5, 7], dtype='int64')

In [46]:
 indA.union(indB) # union

Int64Index([1, 2, 3, 5, 7, 9, 11], dtype='int64')

In [47]:
 indA.symmetric_difference(indB)

Int64Index([1, 2, 9, 11], dtype='int64')