In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.__version__

'1.5.1'

# Pandas Series Object

In [3]:
data = pd.Series([11,22,33,44,55])
data

0    11
1    22
2    33
3    44
4    55
dtype: int64

In [4]:
data.values, data.index

(array([11, 22, 33, 44, 55], dtype=int64), RangeIndex(start=0, stop=5, step=1))

In [5]:
# Custom indexes 
data = pd.Series([11,22,33,44,55],index=['a','b','c','d','e'])
data

a    11
b    22
c    33
d    44
e    55
dtype: int64

# Pandas Dataframe Obect

In [6]:
area_dict = {'california':12122,
             'Texas':4747,
             'New York': 76768,
             'Florida':98987,
             'Illionis':564786
            }
area = pd.Series(area_dict)
area

california     12122
Texas           4747
New York       76768
Florida        98987
Illionis      564786
dtype: int64

In [7]:
popu_dict = {'california':122,
             'Texas':2222,
             'New York': 3333,
             'Florida':95555,
             'Illionis':3333
            }
population = pd.Series(popu_dict)
population

california      122
Texas          2222
New York       3333
Florida       95555
Illionis       3333
dtype: int64

In [8]:
states = pd.DataFrame({'Population':population,
                       'area':area})
states

Unnamed: 0,Population,area
california,122,12122
Texas,2222,4747
New York,3333,76768
Florida,95555,98987
Illionis,3333,564786


In [9]:
states['area']

california     12122
Texas           4747
New York       76768
Florida        98987
Illionis      564786
Name: area, dtype: int64

# Data Indexing and Selection

### Data selection in series

In [18]:
data = pd.Series(np.arange(1,20,4),index=['a','b','c','d','e'])
data

a     1
b     5
c     9
d    13
e    17
dtype: int32

In [27]:
data['d'], data.keys(),list(data.items())

(13,
 Index(['a', 'b', 'c', 'd', 'e'], dtype='object'),
 [('a', 1), ('b', 5), ('c', 9), ('d', 13), ('e', 17)])

In [39]:
data['a':'c'], data[2:], data[::-1]

(a    1
 b    5
 c    9
 dtype: int32,
 c     9
 d    13
 e    17
 dtype: int32,
 e    17
 d    13
 c     9
 b     5
 a     1
 dtype: int32)

## Indexers : LOC, iloc, ix

In [40]:
data = pd.Series(['a','b','c'],index=[1,2,3])

* loc => Uses Explicit refrences

In [50]:
data.loc[1], data.loc[1:3]

('a',
 1    a
 2    b
 3    c
 dtype: object)

* iloc => uses implicit refrences

In [52]:
data.iloc[1], data.iloc[1:3]

('b',
 2    b
 3    c
 dtype: object)

* ix

# Data selection in DataFrame

In [54]:

area = pd.Series({'California': 423967, 'Texas': 695662,
 'New York': 141297, 'Florida': 170312,
 'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
 'New York': 19651127, 'Florida': 19552860,
 'Illinois': 12882135})
data = pd.DataFrame({'area':area, 'pop':pop})
data


Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860
Illinois,149995,12882135


In [56]:
data['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [62]:
data['area'][::-1]

Illinois      149995
Florida       170312
New York      141297
Texas         695662
California    423967
Name: area, dtype: int64

In [63]:
data['density'] =  data['pop']// data['area']
data

Unnamed: 0,area,pop,density
California,423967,38332521,90
Texas,695662,26448193,38
New York,141297,19651127,139
Florida,170312,19552860,114
Illinois,149995,12882135,85


In [65]:
data.values

array([[  423967, 38332521,       90],
       [  695662, 26448193,       38],
       [  141297, 19651127,      139],
       [  170312, 19552860,      114],
       [  149995, 12882135,       85]], dtype=int64)

In [66]:
data.T

Unnamed: 0,California,Texas,New York,Florida,Illinois
area,423967,695662,141297,170312,149995
pop,38332521,26448193,19651127,19552860,12882135
density,90,38,139,114,85


In [67]:
data

Unnamed: 0,area,pop,density
California,423967,38332521,90
Texas,695662,26448193,38
New York,141297,19651127,139
Florida,170312,19552860,114
Illinois,149995,12882135,85


In [73]:
%time data.loc[:'Florida',:'pop']

CPU times: total: 0 ns
Wall time: 0 ns


Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860


In [74]:
%time data.iloc[:4,:2]

CPU times: total: 0 ns
Wall time: 0 ns


Unnamed: 0,area,pop
California,423967,38332521
Texas,695662,26448193
New York,141297,19651127
Florida,170312,19552860


In [76]:
# ix is deprecated
# data.ix[:'Florida',:2]