In [10]:
import pandas as pd

In [11]:
import numpy as np

In [12]:
import matplotlib.pyplot as plt


In [19]:
'''
Series is a one-dimensional labeled array capable of holding any data type 
(integers, strings, floating point numbers, Python objects, etc.). 
The axis labels are collectively referred to as the index. 
Below is a basic method to create series:

s = pd.Series(data, index=index)

data can be: a Python dict
             an ndarray
             a scalar value (like 5)             
'''

# index must have the same length as data, by default, it will create an index of
# [0, 1, .... len(data) -1]
s = pd.Series(np.random.randn(5), index = ['a', 'b', 'c', 'd', 'e'])
s

a   -0.272735
b   -0.075288
c   -0.316803
d    0.098166
e   -1.414065
dtype: float64

In [18]:
s.index # see all index like this

Index([u'a', u'b', u'c', u'd', u'e'], dtype='object')

In [20]:
# default index assignment (indices are integer numbers)
pd.Series(np.random.rand(5))

0    0.665328
1    0.440112
2    0.653838
3    0.480976
4    0.243524
dtype: float64

In [24]:
'''
Note: pandas supports non-unique index values. If an operation that does not 
support duplicate index values is attempted, an exception will be raised at 
that time. The reason for being lazy is nearly all performance-based 
(there are many instances in computations, like parts of GroupBy, where the 
index is not used).
'''

'''
From dict

If data is a dict, if index is passed the values in data corresponding to the 
labels in the index will be pulled out. Otherwise, an index will be constructed 
from the sorted keys of the dict, if possible.
'''

d = {'a' : 0., 'b' : 1.1, 'c' : 'abcd'}

In [25]:
pd.Series(d)

a       0
b     1.1
c    abcd
dtype: object

In [27]:
pd.Series(d,index = ['b','c','d','a']) # This will have the series in the order
                                       # given

b     1.1
c    abcd
d     NaN
a       0
dtype: object

In [28]:
'''
From scalar value If data is a scalar value, an index must be provided. 
The value will be repeated to match the length of index
'''
#if data is a single scalar value, it will be assigned to all keys in the index.
pd.Series(5, index = ['a','b','c','d','e']) 

a    5
b    5
c    5
d    5
e    5
dtype: int64

In [34]:
'''
Series acts very similarly to a ndarray, and is a valid argument to most NumPy 
functions. However, things like slicing also slice the index.
'''
s = pd.Series(np.random.rand(5), index = ['1','a','c','3','6'])
s

1    0.969635
a    0.029219
c    0.152241
3    0.407070
6    0.281966
dtype: float64

In [35]:
s[0] # this will get first index. The order of index is defined by us from above

0.96963525216332513

In [36]:
s[:3] # first three indices

1    0.969635
a    0.029219
c    0.152241
dtype: float64

In [44]:
#.median is a function that gets the VALUe that is the median value not index.
s[s>s.median()] # the operation in the middle needs to be a comparison

1    0.969635
3    0.407070
dtype: float64

In [47]:
# the index and of median value is 6 and median value is 0.281966
s[s == s.median()] 

6    0.281966
dtype: float64

In [48]:
# Get the 4, 3, 1 indices
s[[4,3,1]]

6    0.281966
3    0.407070
a    0.029219
dtype: float64

In [49]:
s[4,3,1] # Cannot do it like this

KeyError: (4, 3, 1)