In [1]:
# Lets check our verion of Pandas
import pandas
pandas.__version__

u'0.23.4'

In [2]:
# We import NumPy under the alias np, so we will import Pandas under the alias pd
import pandas as pd

In [3]:
# At basic level, Pandas objects can be thought of as enhanced version of NumPy structured arrays
# The rows and columns are identified with labels rather than simple integer indicies
# Lets introduce 3 fundamental Pandas data structures: Series, DataFrame, and Index
import numpy as np
import pandas as pd
# A Pandas Series is a 1D array of indexed data
# Can be created from a list of arrays as follows:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [4]:
# As we can see, the Series wraps both a sequence of values and a sequence of indicies
# We can access data using values and index attributes
# Values are simply a familiar NumPy array
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [5]:
# Index is an array-like object of type pd.Index:
data.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
# Like NumPy arrays, data can be accessed by the associated index via the Python square-bracket notation
data[1]

0.5

In [7]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [8]:
# As we will see, the Pandas Series is much more flexible than the 1D NumPy array that it emulates
# Series and NumPy arrays are very similar
# NumPy array has an implicitly defined integer index used to access values
# Pandas Series has an explicitly defined index associated with the values
# Explicit index definition gives the series object additional capabilities
# Ex. Index doesn't have to be an integer, can consist of values of any type
# Ex. We can use strings as an index:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [9]:
# And item access works as expected:
data['b']

0.5

In [10]:
# We can even use noncontiguous/nonsequential indicies:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=[2, 5, 3, 7])
data

2    0.25
5    0.50
3    0.75
7    1.00
dtype: float64

In [11]:
data[5]

0.5

In [12]:
# In this way, we can think of Pandas Series as a specialization of a Python dictionary
# Dictionary is structures that maps arbitrary keys to set of arbitrary values
# Series is a structure that maps typed keys to a set of typed values
# Just as type-specific compiled code behind a NumPy array makes it more efficient than a Python list sometimes,
# Type information of Pandas Series makes it more efficient than Python dictionaries sometimes
# Lets make this Series-as-dictionary analogy more clear by creating a Series object directly from a Python Library:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
population = pd.Series(population_dict)
population

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [13]:
# By default, Series will be created where the index is drawn from sorted keys
# From here, typical dictionary-style item access can be performed:
population['California']

38332521

In [14]:
# Unlike dictionary, Series also supports array-style operations such as slicing
population['California':'Illinois']

California    38332521
Florida       19552860
Illinois      12882135
dtype: int64

In [15]:
# Already seen dew ways of constructing a Pandas Series from scratch
# All of them are some version of pd.Series(data, index=index)
# Ex. data can be a list or NumPy array, in which case the index defaults to integer sequence
pd.Series([2, 4, 6])

0    2
1    4
2    6
dtype: int64

In [16]:
# data can be a scalar which is repeated to fill the specified index:
pd.Series(5, index=[100, 200, 300])

100    5
200    5
300    5
dtype: int64

In [17]:
# data can be dictionary which defaults to the sorted dictionary keys:
pd.Series({2:'a', 1:'b', 3:'c'})

1    b
2    a
3    c
dtype: object

In [18]:
# In each case, index can be explicitly set if different result is preferred:
pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2])

3    c
2    a
dtype: object

In [None]:
# Above, the Series is populated only with the explicitly identified keys