# Series

In [13]:
import numpy as np

In [14]:
import pandas as pd

In [15]:
# Here, we specify the index
s = pd.Series(np.random.randn(5), index=['a','b','c','d','e'])

In [16]:
s

a    0.698821
b   -0.892696
c    1.480501
d    0.513472
e   -1.247381
dtype: float64

In [17]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [18]:
# Here, we let Pandas create a default index
pd.Series(np.random.randn(5))

0    1.334769
1   -1.229673
2   -1.084456
3    0.184026
4   -1.246211
dtype: float64

### Series can be created from dictionary

In [19]:
d = {'b':1, 'a':0,'c':2}

In [20]:
pd.Series(d) #dictionary insertion order is used as index when one isn't provided

b    1
a    0
c    2
dtype: int64

### Create Series from scalar value

In [21]:
# index values are necessary when dealing with scalar values
pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [22]:
s[0]

  s[0]


0.6988212260394497

In [23]:
s[:3]

a    0.698821
b   -0.892696
c    1.480501
dtype: float64

In [24]:
s[s > s.median()] # values greater than median

a    0.698821
c    1.480501
dtype: float64

In [26]:
s[[4,3,1]]

  s[[4,3,1]]


e   -1.247381
d    0.513472
b   -0.892696
dtype: float64

In [27]:
np.exp(s) # calculating exponentials

a    2.011380
b    0.409550
c    4.395145
d    1.671083
e    0.287256
dtype: float64

In [28]:
s.dtype

dtype('float64')

In [29]:
s.to_numpy()

array([ 0.69882123, -0.89269625,  1.48050051,  0.51347204, -1.24738139])

### Series is dict-like

In [30]:
s['a']

0.6988212260394497

In [31]:
s['e'] = 12

In [32]:
s

a     0.698821
b    -0.892696
c     1.480501
d     0.513472
e    12.000000
dtype: float64

In [33]:
'e' in s

True

In [34]:
'f' in s

False

In [35]:
s['f']

KeyError: 'f'

### Vectorized operations

In [36]:
s + s

a     1.397642
b    -1.785393
c     2.961001
d     1.026944
e    24.000000
dtype: float64

In [37]:
s * 2

a     1.397642
b    -1.785393
c     2.961001
d     1.026944
e    24.000000
dtype: float64

In [38]:
np.exp(s)

a         2.011380
b         0.409550
c         4.395145
d         1.671083
e    162754.791419
dtype: float64

In [39]:
# Difference between series and ndarray is that operations between series automatically
# based on the label.
s1 = s[1:]

In [40]:
s2 = s[:-1]

In [41]:
s1 + s2

a         NaN
b   -1.785393
c    2.961001
d    1.026944
e         NaN
dtype: float64

### Name attribute

In [42]:
s = pd.Series(np.random.randn(5), name = 'something')

In [43]:
s

0   -0.533531
1   -0.962794
2   -0.680108
3    1.304962
4   -0.588522
Name: something, dtype: float64

In [44]:
s.name

'something'

# <u> DataFrame</u>

In [45]:
d = {'one': pd.Series([1., 2., 3.], index=['a', 'b', 'c']),
     'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

In [46]:
df = pd.DataFrame(d)

In [47]:
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [48]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [49]:
# DataFrame without pre made indeces
d = {'one': [1., 2., 3., 4.],
     'two': [4., 3., 2., 1.]}

pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [50]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [51]:
# Make dataframe from a series
pd.DataFrame(pd.Series(np.random.randn(5), name='something')) 

Unnamed: 0,something
0,-1.695895
1,-1.258688
2,-0.418089
3,0.477013
4,0.722714


## Column selection, addition, deletion

In [52]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [53]:
df['three'] = df['one'] * df['two']

In [54]:
df['flag'] = df['one'] > 2

In [55]:
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [58]:
# columns can be deleted like a in a dictionary
del df['two']

KeyError: 'two'

In [59]:
df

Unnamed: 0,one,three,flag
a,1.0,1.0,False
b,2.0,4.0,False
c,3.0,9.0,True
d,,,False


In [60]:
# when adding a scalar value, the value will duplicate to fill the column
df['foo'] = 'bar'

In [61]:
df

Unnamed: 0,one,three,flag,foo
a,1.0,1.0,False,bar
b,2.0,4.0,False,bar
c,3.0,9.0,True,bar
d,,,False,bar


In [62]:
# When insterting a Series without an index as the DF, it will conform to current DF's index
df['one_trunc'] = df['one'][:2]

In [63]:
df

Unnamed: 0,one,three,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,4.0,False,bar,2.0
c,3.0,9.0,True,bar,
d,,,False,bar,


In [65]:
df = pd.DataFrame(np.random.randn(8,3), index=range(8), columns = list('ABC'))

In [66]:
df

Unnamed: 0,A,B,C
0,-1.323655,1.064384,-1.22818
1,-0.355191,-1.700412,-0.482691
2,-0.222277,-0.921645,-0.794216
3,-0.405092,-1.090593,0.88407
4,1.254576,0.700805,-1.058696
5,0.693518,0.098728,-1.380604
6,0.126198,-0.842845,0.412097
7,0.213331,-0.292468,2.003798


In [67]:
df * 5 + 2

Unnamed: 0,A,B,C
0,-4.618277,7.321919,-4.140899
1,0.224047,-6.502058,-0.413457
2,0.888617,-2.608226,-1.971078
3,-0.02546,-3.452963,6.420352
4,8.272879,5.504025,-3.293481
5,5.467592,2.49364,-4.903022
6,2.63099,-2.214227,4.060485
7,3.066655,0.537659,12.01899


In [68]:
1 / df

Unnamed: 0,A,B,C
0,-0.755484,0.939511,-0.814213
1,-2.81539,-0.588093,-2.071717
2,-4.498901,-1.085016,-1.259104
3,-2.468575,-0.916933,1.131132
4,0.797082,1.426931,-0.944558
5,1.441923,10.128848,-0.72432
6,7.924058,-1.186457,2.426613
7,4.687551,-3.419176,0.499052


In [69]:
df ** 4

Unnamed: 0,A,B,C
0,3.069727,1.283492,2.275348
1,0.015916,8.36019,0.054285
2,0.002441,0.721531,0.397882
3,0.026929,1.414654,0.610868
4,2.477352,0.241206,1.256277
5,0.23133,9.5e-05,3.633098
6,0.000254,0.504652,0.02884
7,0.002071,0.007317,16.121885


In [70]:
# Boolean operators are vectorized as well:
df1 = pd.DataFrame({'a':[1,0,1], 'b': [0,1,1]}, dtype=bool)

In [71]:
df2 = pd.DataFrame({'a':[0,1,1], 'b': [1,1,0]}, dtype=bool)

In [76]:
df1 & df2

Unnamed: 0,a,b
0,False,False
1,False,True
2,True,False


In [73]:
df1 | df2

Unnamed: 0,a,b
0,True,True
1,True,True
2,True,True


In [74]:
df1 ^ df2

Unnamed: 0,a,b
0,True,True
1,True,False
2,False,True


In [75]:
-df1

Unnamed: 0,a,b
0,False,True
1,True,False
2,False,False


Boolean operators and their meaning:
& == AND, 
| == OR, 
^ == XOR