In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 

In [2]:
sd = pd.Series(np.arange(3.))

In [5]:
sd.iloc[-1]

2.0

In [6]:
sd.index = ['a','b','c']

In [8]:
sd[-1]  #position based indexing works with non-integer indices , which was set above

2.0

In [9]:
sd[:2]

a    0.0
b    1.0
dtype: float64

### Adding two series objects together , returns the sum of the similar indexes. if index is not present , a null value (Nan) is returned 

### Adding two DataFrame objects , adds based on index and columns. Again , if the indexes and column names dont match , the resulting dataFrame will contain null values for the subsequent empty indexes and columns

In [23]:
a = pd.Series(np.arange(4), index = ['a','b','c','d'])

In [24]:
b = pd.Series(np.arange(3), index = ['a','c','e'])

In [25]:
a

a    0
b    1
c    2
d    3
dtype: int64

In [26]:
b

a    0
c    1
e    2
dtype: int64

In [27]:
c = a + b 

In [28]:
c

a    0.0
b    NaN
c    3.0
d    NaN
e    NaN
dtype: float64

In [13]:
df1 = pd.DataFrame(np.random.randn(12).reshape(3,4), index = ['a','b','c'])

In [14]:
df1

Unnamed: 0,0,1,2,3
a,-0.109884,1.558861,0.809987,0.850103
b,0.113128,1.272484,0.024923,0.624785
c,-0.9416,-0.370533,0.956395,0.701155


In [15]:
df2 = pd.DataFrame(np.random.randn(20).reshape(4,5), index = ['a','b','c','e'], columns = [1,2,4,5,6])

In [16]:
df2

Unnamed: 0,1,2,4,5,6
a,-1.399322,0.36643,0.132838,1.480182,1.582635
b,0.320667,1.285256,0.071975,1.012967,-0.492326
c,-1.216631,0.814155,-1.105067,0.777517,0.338993
e,0.795738,-0.249368,0.015074,-0.828319,-1.27996


In [17]:
df2.loc['c', 4] = np.nan

In [18]:
df2

Unnamed: 0,1,2,4,5,6
a,-1.399322,0.36643,0.132838,1.480182,1.582635
b,0.320667,1.285256,0.071975,1.012967,-0.492326
c,-1.216631,0.814155,,0.777517,0.338993
e,0.795738,-0.249368,0.015074,-0.828319,-1.27996


In [20]:
df3 = df1 + df2

In [22]:
df3

Unnamed: 0,0,1,2,3,4,5,6
a,,0.159539,1.176417,,,,
b,,1.593151,1.310178,,,,
c,,-1.587164,1.77055,,,,
e,,,,,,,


In [29]:
df3 = df1.add(df2, fill_value=0)

In [30]:
df3

Unnamed: 0,0,1,2,3,4,5,6
a,-0.109884,0.159539,1.176417,0.850103,0.132838,1.480182,1.582635
b,0.113128,1.593151,1.310178,0.624785,0.071975,1.012967,-0.492326
c,-0.9416,-1.587164,1.77055,0.701155,,0.777517,0.338993
e,,0.795738,-0.249368,,0.015074,-0.828319,-1.27996


##### Operations between DataFrames and Series 

In [31]:
arr = np.arange(12).reshape(3,4)

In [32]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [33]:
arr[0]

array([0, 1, 2, 3])

In [34]:
arr1 = arr - arr[0]   #subtracts arr[0] from every row in arr

arr1

array([[0, 0, 0, 0],
       [4, 4, 4, 4],
       [8, 8, 8, 8]])

In [37]:
dataframe = pd.DataFrame(np.arange(12).reshape(3,4), columns = ['a','b','c','d'], index = ['x','y','z'])

In [38]:
dataframe

Unnamed: 0,a,b,c,d
x,0,1,2,3
y,4,5,6,7
z,8,9,10,11


In [40]:
series = dataframe.iloc[0]

series

a    0
b    1
c    2
d    3
Name: x, dtype: int64

In [41]:
dt_s = dataframe - series

dt_s                           # subtracting the series from a dataframe , subtracts the series from every row 
                               # in the dataframe 

Unnamed: 0,a,b,c,d
x,0,0,0,0
y,4,4,4,4
z,8,8,8,8


In [45]:
dataframe.sub(series, axis='columns')  # .sub() method over the column axis 

Unnamed: 0,a,b,c,d
x,0,0,0,0
y,4,4,4,4
z,8,8,8,8


## Function application and mapping 

In [46]:
frame = pd.DataFrame(np.random.randn(4,3), columns = list('bde'), index = ['utah','ohio','texas','oregan'])

In [47]:
frame

Unnamed: 0,b,d,e
utah,0.037151,0.800718,1.066514
ohio,-0.078367,-0.126741,0.578821
texas,3.494606,-1.538392,-0.296429
oregan,-1.574364,-0.899409,-0.657613


In [48]:
f = lambda x: x.max() - x.min()

In [51]:
frame = np.abs(frame)

In [52]:
frame

Unnamed: 0,b,d,e
utah,0.037151,0.800718,1.066514
ohio,0.078367,0.126741,0.578821
texas,3.494606,1.538392,0.296429
oregan,1.574364,0.899409,0.657613


In [53]:
frame.apply(f)     # apply() method applies a fucntion on 1d arrays to each column or row 

b    3.457455
d    1.411651
e    0.770085
dtype: float64

In [55]:
frame.apply(f, axis = 'columns')

utah      1.029363
ohio      0.500454
texas     3.198177
oregan    0.916751
dtype: float64