# Pandas basics


In [1]:
import numpy as np
import pandas as pd

In [2]:
pd.Series(data=[1,2,3,4], index=['a','b', 'c', 'd'])

a    1
b    2
c    3
d    4
dtype: int64

In [3]:
myD = {'e': 20, 'f': 200, 'g': 2000, 'h': 15}

In [4]:
pd.Series(myD)

e      20
f     200
g    2000
h      15
dtype: int64

In [5]:
mySeriesOne = pd.Series([1,2,3,4,5], ['a', 'b', 'c', 'd', 'e'])

In [6]:
mySeriesOne

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [7]:
mySeriesTwo = pd.Series([1,2,7,4,2], ['a', 'b', 'c', 'f', 'e'])

In [8]:
mySeriesOne + mySeriesTwo

a     2.0
b     4.0
c    10.0
d     NaN
e     7.0
f     NaN
dtype: float64

In [9]:
mySeriesTwo['f']

4

In [10]:
from numpy.random import randn

In [11]:
mydata = randn(3,4)

In [12]:
myDataFrame = pd.DataFrame(mydata, ['R1', 'R2', 'R3'], ['C1', 'C2', 'C3', 'C4'])

In [13]:
myDataFrame

Unnamed: 0,C1,C2,C3,C4
R1,1.374392,0.347265,0.501291,1.367758
R2,0.086493,1.018043,-1.48213,0.480627
R3,-0.301161,-0.37948,0.092923,-0.439009


In [14]:
myDataFrame['C1']

R1    1.374392
R2    0.086493
R3   -0.301161
Name: C1, dtype: float64

In [15]:
myDataFrame[['C1', 'C3']]

Unnamed: 0,C1,C3
R1,1.374392,0.501291
R2,0.086493,-1.48213
R3,-0.301161,0.092923


In [16]:
myDataFrame['C5'] = myDataFrame['C1'] * myDataFrame['C3']

In [17]:
myDataFrame

Unnamed: 0,C1,C2,C3,C4,C5
R1,1.374392,0.347265,0.501291,1.367758,0.688971
R2,0.086493,1.018043,-1.48213,0.480627,-0.128193
R3,-0.301161,-0.37948,0.092923,-0.439009,-0.027985


In [18]:
myDataFrame.drop('C2', axis=1, inplace=True)

In [19]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971
R2,0.086493,-1.48213,0.480627,-0.128193
R3,-0.301161,0.092923,-0.439009,-0.027985


In [20]:
myDataFrame.loc['R2']

C1    0.086493
C3   -1.482130
C4    0.480627
C5   -0.128193
Name: R2, dtype: float64

In [21]:
myDataFrame.iloc[1]

C1    0.086493
C3   -1.482130
C4    0.480627
C5   -0.128193
Name: R2, dtype: float64

In [22]:
myDataFrame.loc[['R1', 'R2'], ['C4', 'C5']]

Unnamed: 0,C4,C5
R1,1.367758,0.688971
R2,0.480627,-0.128193


In [23]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971
R2,0.086493,-1.48213,0.480627,-0.128193
R3,-0.301161,0.092923,-0.439009,-0.027985


In [24]:
myDataFrame > 0

Unnamed: 0,C1,C3,C4,C5
R1,True,True,True,True
R2,True,False,True,False
R3,False,True,False,False


In [25]:
myDataFrame[myDataFrame > 0]

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971
R2,0.086493,,0.480627,
R3,,0.092923,,


In [26]:
myDataFrame[myDataFrame['C3'] > 0]

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971
R3,-0.301161,0.092923,-0.439009,-0.027985


In [27]:
myDataFrame[myDataFrame['C5'] > 0]

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971


In [29]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971
R2,0.086493,-1.48213,0.480627,-0.128193
R3,-0.301161,0.092923,-0.439009,-0.027985


In [30]:
myDataFrame[myDataFrame['C4'] > 0]

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971
R2,0.086493,-1.48213,0.480627,-0.128193


In [33]:
myDataFrame[(myDataFrame['C5'] > 0) | (myDataFrame['C4'] > 0)]

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971
R2,0.086493,-1.48213,0.480627,-0.128193


In [34]:
myDataFrame[(myDataFrame['C5'] > 0) & (myDataFrame['C4'] > 0)]

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971


In [36]:
True and False

False

In [37]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5
R1,1.374392,0.501291,1.367758,0.688971
R2,0.086493,-1.48213,0.480627,-0.128193
R3,-0.301161,0.092923,-0.439009,-0.027985


In [38]:
myNewIndex = ['row1', 'row2', 'row3']

In [39]:
myNewIndex

['row1', 'row2', 'row3']

In [40]:
myDataFrame['NewIndex'] = myNewIndex

In [41]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5,NewIndex
R1,1.374392,0.501291,1.367758,0.688971,row1
R2,0.086493,-1.48213,0.480627,-0.128193,row2
R3,-0.301161,0.092923,-0.439009,-0.027985,row3


In [42]:
myDataFrame.set_index('NewIndex')

Unnamed: 0_level_0,C1,C3,C4,C5
NewIndex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
row1,1.374392,0.501291,1.367758,0.688971
row2,0.086493,-1.48213,0.480627,-0.128193
row3,-0.301161,0.092923,-0.439009,-0.027985


In [43]:
myDataFrame

Unnamed: 0,C1,C3,C4,C5,NewIndex
R1,1.374392,0.501291,1.367758,0.688971,row1
R2,0.086493,-1.48213,0.480627,-0.128193,row2
R3,-0.301161,0.092923,-0.439009,-0.027985,row3
