In [1]:
import pandas as pd
import numpy as np

In [2]:
ser1 = pd.Series([10,20,30,40])
ser1

0    10
1    20
2    30
3    40
dtype: int64

In [3]:
ser1.values

array([10, 20, 30, 40], dtype=int64)

In [4]:
ser1.index

RangeIndex(start=0, stop=4, step=1)

In [5]:
ser2 = pd.Series([10, 20, 30, 40], index=['a', 'b', 'c', 'd'])
ser2

a    10
b    20
c    30
d    40
dtype: int64

In [6]:
ser2.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [7]:
ser2['d'] = 50
ser2[['c','d']]

c    30
d    50
dtype: int64

In [8]:
ser1[[0,2]]

0    10
2    30
dtype: int64

In [9]:
ser2 = pd.Series([10, 20, 30, 40], index=['a', 'b', 'c', 'd'])
ser2[ser2 > 25] ## filtering with a boolean array

c    30
d    40
dtype: int64

In [10]:
import numpy as np
np.sqrt(ser2) # maths operation on data series

a    3.162278
b    4.472136
c    5.477226
d    6.324555
dtype: float64

In [11]:
print('a' in ser2)
10 in ser2.values

True


True

In [12]:
sdata = {'Karachi': 35000, 'Lahore': 71000, 'Islamabad': 16000, 'Quetta': 5000}
ser3 = pd.Series(sdata)
ser3

Karachi      35000
Lahore       71000
Islamabad    16000
Quetta        5000
dtype: int64

In [13]:
cities = ['Karachi', 'Islamabad', 'Lahore', 'Hyderabad']
ser4 = pd.Series(sdata, index=cities)
ser4

Karachi      35000.0
Islamabad    16000.0
Lahore       71000.0
Hyderabad        NaN
dtype: float64

In [14]:
pd.isnull(ser4), pd.notnull(ser4)

(Karachi      False
 Islamabad    False
 Lahore       False
 Hyderabad     True
 dtype: bool, Karachi       True
 Islamabad     True
 Lahore        True
 Hyderabad    False
 dtype: bool)

In [15]:
print(ser3)
print()
print(ser4)
print()
print(ser3 + ser4)
print()
ser3.add(ser4, fill_value=0)

Karachi      35000
Lahore       71000
Islamabad    16000
Quetta        5000
dtype: int64

Karachi      35000.0
Islamabad    16000.0
Lahore       71000.0
Hyderabad        NaN
dtype: float64

Hyderabad         NaN
Islamabad     32000.0
Karachi       70000.0
Lahore       142000.0
Quetta            NaN
dtype: float64



Hyderabad         NaN
Islamabad     32000.0
Karachi       70000.0
Lahore       142000.0
Quetta         5000.0
dtype: float64

In [16]:
d = {'A': 10, 'B': 20, 'C': 30, 'D': 40}
ser5 = pd.Series(d)
ser5.name = 'Kashif'
ser5.index.name = 'Alpha'
display(ser5)
ser5.index = 1,2,3,4
ser5.index.name = 'Digits'
ser5

Alpha
A    10
B    20
C    30
D    40
Name: Kashif, dtype: int64

Digits
1    10
2    20
3    30
4    40
Name: Kashif, dtype: int64

# DataFrames

In [17]:
data = {'city': ['Karachi', 'Karachi', 'Karachi', 'Lahore', 'Lahore', 'Lahore'],
        'year' : [2000, 2001, 2002, 2001, 2002, 2003],
        'pop'  : [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data, columns=['year', 'city', 'pop', 'area'],
                    index=['one', 'two', 'three', 'four', 'five', 'six'])
frame

Unnamed: 0,year,city,pop,area
one,2000,Karachi,1.5,
two,2001,Karachi,1.7,
three,2002,Karachi,3.6,
four,2001,Lahore,2.4,
five,2002,Lahore,2.9,
six,2003,Lahore,3.2,


In [18]:
frame.year
frame['year'] # same as above but little faster

one      2000
two      2001
three    2002
four     2001
five     2002
six      2003
Name: year, dtype: int64

In [19]:
frame['area'] = np.arange(100,700,100)
frame

Unnamed: 0,year,city,pop,area
one,2000,Karachi,1.5,100
two,2001,Karachi,1.7,200
three,2002,Karachi,3.6,300
four,2001,Lahore,2.4,400
five,2002,Lahore,2.9,500
six,2003,Lahore,3.2,600


In [20]:
val = pd.Series([-1.2, -1.5, -1.7], index=['two', 'four', 'five'])
frame['area'] = val
frame

Unnamed: 0,year,city,pop,area
one,2000,Karachi,1.5,
two,2001,Karachi,1.7,-1.2
three,2002,Karachi,3.6,
four,2001,Lahore,2.4,-1.5
five,2002,Lahore,2.9,-1.7
six,2003,Lahore,3.2,


In [21]:
frame['karachi'] = frame.city == 'Karachi'
frame

Unnamed: 0,year,city,pop,area,karachi
one,2000,Karachi,1.5,,True
two,2001,Karachi,1.7,-1.2,True
three,2002,Karachi,3.6,,True
four,2001,Lahore,2.4,-1.5,False
five,2002,Lahore,2.9,-1.7,False
six,2003,Lahore,3.2,,False


In [22]:
del frame['karachi']
frame

Unnamed: 0,year,city,pop,area
one,2000,Karachi,1.5,
two,2001,Karachi,1.7,-1.2
three,2002,Karachi,3.6,
four,2001,Lahore,2.4,-1.5
five,2002,Lahore,2.9,-1.7
six,2003,Lahore,3.2,


In [23]:
pop = {'Karachi': {2001: 2.4, 2002: 2.9},
       'Lahore': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
frame3 = pd.DataFrame(pop)
frame3

Unnamed: 0,Karachi,Lahore
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [24]:
frame3.T

Unnamed: 0,2001,2002,2000
Karachi,2.4,2.9,
Lahore,1.7,3.6,1.5


In [25]:
pop = {'Karachi': {2001: 2.4, 2002: 2.9},
       'Lahore': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
pd.DataFrame(pop, index=[2000, 2001, 2002, 2003])

Unnamed: 0,Karachi,Lahore
2000,,1.5
2001,2.4,1.7
2002,2.9,3.6
2003,,


In [26]:
frame3.values

array([[2.4, 1.7],
       [2.9, 3.6],
       [nan, 1.5]])

In [27]:
frame

Unnamed: 0,year,city,pop,area
one,2000,Karachi,1.5,
two,2001,Karachi,1.7,-1.2
three,2002,Karachi,3.6,
four,2001,Lahore,2.4,-1.5
five,2002,Lahore,2.9,-1.7
six,2003,Lahore,3.2,


In [28]:
obj = pd.DataFrame(np.zeros((3,3)), index=list('abc'), columns=list('xyz'))
# obj.index # Index objects are immutable and thus can’t be modified by the user
obj.columns + obj.index

Index(['xa', 'yb', 'zc'], dtype='object')

In [29]:
labels = pd.Index(np.arange(3))
labels.drop(1)

Int64Index([0, 2], dtype='int64')

In [30]:
frame3

Unnamed: 0,Karachi,Lahore
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


In [31]:
frame3.columns

Index(['Karachi', 'Lahore'], dtype='object')

In [32]:
'Lahore' in frame3.columns

True

In [33]:
2000 in frame3.index

True

# 5.2 Essential Functionality

In [34]:
obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=['d', 'b', 'a', 'c'])
obj

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64

In [35]:
obj2 = obj.reindex(['a', 'b', 'c', 'd', 'e'])
obj2

a   -5.3
b    7.2
c    3.6
d    4.5
e    NaN
dtype: float64

In [52]:
obj3 = pd.Series(['blue', 'purple', 'yellow'], index=[1, 3, 2])
obj3

1      blue
3    purple
2    yellow
dtype: object

In [55]:
obj3.reindex([1,2,3],)

1      blue
2    yellow
3    purple
dtype: object

In [38]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)),
                     index=['a', 'c', 'd'],
                     columns=['Karachi', 'Lahore', 'Islamabad'])
frame

Unnamed: 0,Karachi,Lahore,Islamabad
a,0,1,2
c,3,4,5
d,6,7,8


In [39]:
frame.reindex(['a', 'b', 'c', 'd'], method='ffill')

Unnamed: 0,Karachi,Lahore,Islamabad
a,0,1,2
b,0,1,2
c,3,4,5
d,6,7,8


In [40]:
frame.reindex(columns = ['Karachi', 'Hyderabad', 'Lahore', 'Islamabad'], method='ffill')

ValueError: index must be monotonic increasing or decreasing

In [None]:
frame.loc[['a', 'b', 'c', 'd'], ['Karachi', 'Hyderabad', 'Lahore', 'Islamabad']]

### Dropping Entries from an Axis

In [None]:
obj = pd.Series(np.arange(5.), index=['a', 'b', 'c', 'd', 'e'])
obj

In [None]:
new_obj = obj.drop(['d', 'c'])
new_obj

In [None]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

In [None]:
data.drop(['Colorado', 'Ohio'])

In [None]:
data.drop('two', axis=1)

### Indexing, Selection, and Filtering

In [None]:
obj = pd.Series(np.arange(5.), index=list('abcde'))
obj

In [None]:
obj['b':'d']

In [None]:
obj[1:4]

In [None]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=['Ohio', 'Colorado', 'Utah', 'New York'],
                    columns=['one', 'two', 'three', 'four'])
data

In [None]:
data['two']

In [None]:
data[:2]

In [None]:
data < 5

In [None]:
data

In [None]:
data.loc['Colorado', ['two', 'three']]

In [None]:
data.iloc[2, [3, 0, 1]]