In [7]:
import pandas as pd

In [8]:
import numpy as np

In [9]:
import matplotlib.pyplot as plt


In [10]:
'''
Series is a one-dimensional labeled array capable of holding any data type 
(integers, strings, floating point numbers, Python objects, etc.). 
The axis labels are collectively referred to as the index. 
Below is a basic method to create series:

s = pd.Series(data, index=index)

data can be: a Python dict
             an ndarray
             a scalar value (like 5)             
'''

# index must have the same length as data, by default, it will create an index of
# [0, 1, .... len(data) -1]
s = pd.Series(np.random.randn(5), index = ['a', 'b', 'c', 'd', 'e'])
s

a   -2.250546
b    1.263177
c   -0.317224
d   -0.001793
e   -0.611428
dtype: float64

In [11]:
s.index # see all index like this

Index([u'a', u'b', u'c', u'd', u'e'], dtype='object')

In [20]:
# default index assignment (indices are integer numbers)
pd.Series(np.random.rand(5))

0    0.665328
1    0.440112
2    0.653838
3    0.480976
4    0.243524
dtype: float64

In [24]:
'''
Note: pandas supports non-unique index values. If an operation that does not 
support duplicate index values is attempted, an exception will be raised at 
that time. The reason for being lazy is nearly all performance-based 
(there are many instances in computations, like parts of GroupBy, where the 
index is not used).
'''

'''
From dict

If data is a dict, if index is passed the values in data corresponding to the 
labels in the index will be pulled out. Otherwise, an index will be constructed 
from the sorted keys of the dict, if possible.
'''

d = {'a' : 0., 'b' : 1.1, 'c' : 'abcd'}

In [25]:
pd.Series(d)

a       0
b     1.1
c    abcd
dtype: object

In [27]:
pd.Series(d,index = ['b','c','d','a']) # This will have the series in the order
                                       # given

b     1.1
c    abcd
d     NaN
a       0
dtype: object

In [28]:
'''
From scalar value If data is a scalar value, an index must be provided. 
The value will be repeated to match the length of index
'''
#if data is a single scalar value, it will be assigned to all keys in the index.
pd.Series(5, index = ['a','b','c','d','e']) 

a    5
b    5
c    5
d    5
e    5
dtype: int64

In [34]:
'''
Series acts very similarly to a ndarray, and is a valid argument to most NumPy 
functions. However, things like slicing also slice the index.
'''
s = pd.Series(np.random.rand(5), index = ['1','a','c','3','6'])
s

1    0.969635
a    0.029219
c    0.152241
3    0.407070
6    0.281966
dtype: float64

In [35]:
s[0] # this will get first index. The order of index is defined by us from above

0.96963525216332513

In [36]:
s[:3] # first three indices

1    0.969635
a    0.029219
c    0.152241
dtype: float64

In [44]:
#.median is a function that gets the VALUe that is the median value not index.
s[s>s.median()] # the operation in the middle needs to be a comparison

1    0.969635
3    0.407070
dtype: float64

In [47]:
# the index and of median value is 6 and median value is 0.281966
s[s == s.median()] 

6    0.281966
dtype: float64

In [48]:
# Get the 4, 3, 1 indices
s[[4,3,1]]

6    0.281966
3    0.407070
a    0.029219
dtype: float64

In [49]:
s[4,3,1] # Cannot do it like this

KeyError: (4, 3, 1)

In [12]:
# We can also perform calculations like this
np.exp(s) # which will give exponential of every element in the array

a    0.105342
b    3.536638
c    0.728167
d    0.998209
e    0.542575
dtype: float64

In [13]:
s['a'] # get the value of the key 'a'

-2.2505461558129629

In [15]:
# Assign value to a key. Same as normal dictionary
s['e'] = 12
s['f'] = 11
s

a    -2.250546
b     1.263177
c    -0.317224
d    -0.001793
e    12.000000
f    11.000000
dtype: float64

In [18]:
# Find if a key ins in the dictionary
'e' in s

True

In [20]:
'k' in s

False

In [21]:
# Use the "get" method, if no suck key is inside, NAN will be returned
s.get('f')

11.0

In [27]:
# Use np.nan to show None, if just s.get('g'), nothing will be returned
s.get('g', np.nan) 

nan

In [29]:
'''
A key difference between Series and ndarray is that operations between Series 
automatically align the data based on label. Thus, you can write computations 
without giving consideration to whether the Series involved have the same 
labels.
'''
s[1:] + s[:-1] 
'''
The result of an operation between unaligned Series will have the union of 
the indexes involved. If a label is not found in one Series or the other, 
the result will be marked as missing NaN. Being able to write code without 
doing any explicit data alignment grants immense freedom and flexibility in 
interactive data analysis and research. The integrated data alignment features 
of the pandas data structures set pandas apart from the majority of related 
tools for working with labeled data.

Note In general, we chose to make the default result of operations between 
differently indexed objects yield the union of the indexes in order to avoid 
loss of information. Having an index label, though the data is missing, is 
typically important information as part of a computation. You of course have 
the option of dropping labels with missing data via the dropna function.
'''

a          NaN
b     2.526353
c    -0.634449
d    -0.003585
e    24.000000
f          NaN
dtype: float64

In [30]:
# Give a series a name attribute like this
s = pd.Series(np.random.randn(5), name = 'Series Name')
s

0   -1.856399
1   -0.528810
2    0.116802
3   -1.229416
4    1.634604
Name: Series Name, dtype: float64

In [31]:
# Get the name like this
s.name

'Series Name'