In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [4]:
labels = ['a','b','c']
my_data = [10,20,30]

arr = np.array(my_data)
arr

array([10, 20, 30])

## List to Series conversion 

In [6]:
pd.Series(my_data,index=labels)


a    10
b    20
c    30
dtype: int64

In [7]:
pd.DataFrame(my_data,index=labels)

Unnamed: 0,0
a,10
b,20
c,30


In [8]:
s = pd.Series(my_data,index=labels)
s

a    10
b    20
c    30
dtype: int64

In [19]:
s['a']

10

In [20]:
s[0]

10

In [34]:
s = pd.Series([5,6,7],index=['a','b','c'])
s

a    5
b    6
c    7
dtype: int64

In [1]:
s = pd.DataFrame([5,6,7],index=[1,2,3])
s

<IPython.core.display.Javascript object>

Unnamed: 0,0
1,5
2,6
3,7


This means we can pass other index name when we will create Seires or Dataframe from list.

In [22]:
s[0]

10

In [23]:
s['a']

10

## Array to series conversion 

In [24]:
s = pd.Series(arr,index=['a','b','c'])
s

a    10
b    20
c    30
dtype: int32

In [25]:
s[0]

10

In [26]:
s['a']

10

## Dictionary to series conversion 

In [8]:
d = {'a':10,'b':20,'c':30}
s = pd.Series(d)
s

<IPython.core.display.Javascript object>

a    10
b    20
c    30
dtype: int64

In [31]:
s = pd.Series(d,index=[1,2,3])
s

1   NaN
2   NaN
3   NaN
dtype: float64

This means we can not pass other index name when we will create Seires from dictionary.

In [39]:
pd.Series([5,6,7],index=['r','y','6'])['r']

5

In [5]:
pd.Series([5,6,7],index=['r','y','6'])[0]

<IPython.core.display.Javascript object>

5

In [9]:
d

{'a': 10, 'b': 20, 'c': 30}

In [11]:
df = pd.DataFrame(d,index=['a','b','c'])
df

<IPython.core.display.Javascript object>

Unnamed: 0,a,b,c
a,10,20,30
b,10,20,30
c,10,20,30


This means we have to pass index if we will create dataframe from dictionary.

In [6]:
s = pd.Series(my_data,index=['a','b','c'])
s

<IPython.core.display.Javascript object>

NameError: name 'my_data' is not defined

When we give index as outside in String in series then we can call it as s[0] and so on. But if outside index is also in int. format then we can not call any value with the help of s[0] or s[1]. In this case we need to use orginal index only.

In [59]:
df = pd.DataFrame(d,index=[1,2,3])
df

Unnamed: 0,a,b,c
1,10,20,30
2,10,20,30
3,10,20,30


## more on Series 

In [62]:
s1 = pd.Series([1,2,3,4])
s2 = pd.Series([5,6,7,8])

In [63]:
s1[0]

1

In [66]:
s1[0::2]

0    1
2    3
dtype: int64

In [67]:
s3 = s1+s2
s3

0     6
1     8
2    10
3    12
dtype: int64

This will do the index wise summation.

In [80]:
ser1 = pd.Series([1,2,3,4],index=['ca','or','co','ca'])
ser2 = pd.Series([1,2,5,4],index=['ca','nv','az','ca'])
s3 = ser1+ser2
s3


az    NaN
ca    2.0
ca    5.0
ca    5.0
ca    8.0
co    NaN
nv    NaN
or    NaN
dtype: float64

In [82]:
s3 = ser1*ser2
s3

az     NaN
ca     1.0
ca     4.0
ca     4.0
ca    16.0
co     NaN
nv     NaN
or     NaN
dtype: float64

## Row level data manipulation 

In [12]:
from numpy.random import randn as rn

In [13]:
rn(4,3)

array([[-0.7605093 , -0.24349265,  0.78075916],
       [-0.8124249 , -0.78161963, -0.12835474],
       [-0.58546416, -0.39437086, -0.98055025],
       [-0.44584058,  1.71655605, -1.67335255]])

In [14]:
df = pd.DataFrame(rn(4,3),index=['a','b','c','d'],columns = [1,2,3])
df

<IPython.core.display.Javascript object>

Unnamed: 0,1,2,3
a,0.996037,0.155102,0.118086
b,1.227694,2.211342,-1.238636
c,-0.494268,0.653652,2.93032
d,0.082938,0.197119,-1.605963


In [15]:
df.iloc[0,2]

0.11808554586636413

In [16]:
df.loc['a',2]

0.15510214985116805

This means that iloc always try to find data from default index and loc try to find data from named index.

In [112]:
df.iloc[1:3,2:]

Unnamed: 0,3
b,-0.108761
c,0.433407


In [19]:
df.loc[['b','c']][3]

b   -1.238636
c    2.930320
Name: 3, dtype: float64

In [20]:
df.loc[['b','c'],3]

b   -1.238636
c    2.930320
Name: 3, dtype: float64

In [123]:
df

Unnamed: 0,1,2,3
a,1.740329,-1.248063,-0.806375
b,-0.290007,1.279982,-0.108761
c,-0.891602,1.542566,0.433407
d,-0.564129,1.167852,0.424058


In [126]:
df.iloc[1::2,0::2]

Unnamed: 0,1,3
b,-0.290007,-0.108761
d,-0.564129,0.424058


In [127]:
df.loc[['b','c'],[1,3]]

Unnamed: 0,1,3
b,-0.290007,-0.108761
c,-0.891602,0.433407


In [131]:
df.loc[:,[1,3]]

Unnamed: 0,1,3
a,1.740329,-0.806375
b,-0.290007,-0.108761
c,-0.891602,0.433407
d,-0.564129,0.424058


In [147]:
df.iloc[0,1:]

2   -1.248063
3   -0.806375
Name: a, dtype: float64

In [21]:
outside = ['g1','g1','g1','g2','g2','g2']
inside = [1,2,3,1,2,3]
hier_index = list(zip(outside,inside))
hier_index

[('g1', 1), ('g1', 2), ('g1', 3), ('g2', 1), ('g2', 2), ('g2', 3)]

In [22]:
hier_index = pd.MultiIndex.from_tuples(hier_index)
hier_index

<IPython.core.display.Javascript object>

MultiIndex([('g1', 1),
            ('g1', 2),
            ('g1', 3),
            ('g2', 1),
            ('g2', 2),
            ('g2', 3)],
           )

In [23]:
df = pd.DataFrame(rn(6,3),index=hier_index,columns=['A','B','C'])
df

<IPython.core.display.Javascript object>

Unnamed: 0,Unnamed: 1,A,B,C
g1,1,-1.146541,1.0302,0.148388
g1,2,-0.981687,0.833649,1.776379
g1,3,-0.241875,-1.387394,0.722656
g2,1,-1.379433,0.206392,2.094224
g2,2,0.205343,-0.739013,2.269768
g2,3,-0.730443,-1.600744,0.253644


In [28]:
df = pd.DataFrame(np.random.rand(6,3),index=hier_index)
df

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,Unnamed: 1,0,1,2
g1,1,0.190673,0.345842,0.215359
g1,2,0.275831,0.716066,0.559379
g1,3,0.103263,0.217776,0.416274
g2,1,0.375168,0.52236,0.309701
g2,2,0.02833,0.692612,0.520557
g2,3,0.584253,0.198771,0.455202


## How to get row inside row 

In [29]:
df.iloc[0:2,0:2].iloc[1:,0:]

Unnamed: 0,Unnamed: 1,0,1
g1,2,0.275831,0.716066


In [30]:
df.loc['g1'].loc[2,[0,1]]

0    0.275831
1    0.716066
Name: 2, dtype: float64

In [31]:
df

Unnamed: 0,Unnamed: 1,0,1,2
g1,1,0.190673,0.345842,0.215359
g1,2,0.275831,0.716066,0.559379
g1,3,0.103263,0.217776,0.416274
g2,1,0.375168,0.52236,0.309701
g2,2,0.02833,0.692612,0.520557
g2,3,0.584253,0.198771,0.455202


In [33]:
df.iloc[4:5,0:2].iloc[0,1]

0.6926115587321405

In [37]:
df.loc['g2'].loc[2:,[0,1,2]]

Unnamed: 0,0,1,2
2,0.02833,0.692612,0.520557
3,0.584253,0.198771,0.455202


In [38]:
df.loc['g2'].loc[2,1]

0.6926115587321405

In [39]:
df

Unnamed: 0,Unnamed: 1,0,1,2
g1,1,0.190673,0.345842,0.215359
g1,2,0.275831,0.716066,0.559379
g1,3,0.103263,0.217776,0.416274
g2,1,0.375168,0.52236,0.309701
g2,2,0.02833,0.692612,0.520557
g2,3,0.584253,0.198771,0.455202
