# Pandas DataStructure known as the Series
### What is Pandas?
#### Pandas is a library used a great deal in the "Data Science" community that encapsulates arrays and provides a lot of functionality and 
#### optimization for certain functions.

### Would I use Pandas for everything?
#### Nope.  Machine learning, see 004_sklearn_pandas_linearRegress_opticsMoorningData, likes single dimensional arrays.
#### But I would use Pandas to read, prep, and then marshal data into the structure my machine learning API wants.

In [1]:
import numpy as np
import pandas as pd

## Quick Pro-tips

In [2]:
#show all data returned from the dataset (could be HUGE, be careful)
pd.set_option('display.max_rows', None)
#or
pd.set_option('display.max_rows', 10)

#also note that it gets tiring seeing LOTS of floating points
pd.options.display.float_format = '{:,.4f}'.format

#nump equivalent
np.set_printoptions(precision=4)

In [3]:
#Series is a one-dimensional labeled array capable of holding any data type 
series = pd.Series([1,2,3,4,5,'red','green','blue',6,7,8,9])
print(series)

0        1
1        2
2        3
3        4
4        5
      ... 
7     blue
8        6
9        7
10       8
11       9
Length: 12, dtype: object


In [4]:
#If data is an ndarray, index must be the same length as data. If no index is passed, one will be created
series=pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
print(series)
print("-------------------------------------------------------------------")
print(series.index)
print("-------------------------------------------------------------------")
print(series[0])
print("-------------------------------------------------------------------")
print(series[:])

a   -0.0687
b   -0.0585
c   -0.7795
d   -1.2889
e   -0.2959
dtype: float64
-------------------------------------------------------------------
Index(['a', 'b', 'c', 'd', 'e'], dtype='object')
-------------------------------------------------------------------
-0.06867768653905743
-------------------------------------------------------------------
a   -0.0687
b   -0.0585
c   -0.7795
d   -1.2889
e   -0.2959
dtype: float64


In [5]:
#notice that a series can be created from a classic (key=value pair) dictionary
d = {'b': 1, 'a': 0, 'c': 2}
series=pd.Series(d)
print(series)
print(series["b"])

b    1
a    0
c    2
dtype: int64
1


In [6]:
#Series acts very similarly to a ndarray, and is a valid argument to most NumPy functions. However, operations such as slicing will also slice the index.
#If data is an ndarray, index must be the same length as data. If no index is passed, one will be created
series=pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
print("Full array")
print("################################################################################################################")
print(series)
print("################################################################################################################")
print("")
print ("Just the first index")
print("    When directly indexed the 'index' is not included.")
print("################################################################################################################")
print(series[0])
print("")

print(" All values up to element #3")
print("################################################################################################################")
print(series[:3])
print("")

print ("Only those values greater than the median")
print("################################################################################################################")
print(series[series > series.median()])
print("")

print("Integrate with numpy and calculate the exponent, notice Numpy integration")
print("################################################################################################################")
print(np.exp(series))

Full array
################################################################################################################
a    0.2679
b   -0.0314
c   -0.0273
d   -1.7439
e    1.5618
dtype: float64
################################################################################################################

Just the first index
    When directly indexed the 'index' is not included.
################################################################################################################
0.26794576717542573

 All values up to element #3
################################################################################################################
a    0.2679
b   -0.0314
c   -0.0273
dtype: float64

Only those values greater than the median
################################################################################################################
a   0.2679
e   1.5618
dtype: float64

Integrate with numpy and calculate the exponent, notice Numpy integration
################

In [7]:
#Series data type operations
print(series.dtype)

float64


In [8]:
#Get the actual array in a series, maybe for direct manipulation
print("Dump the contents of the Series into a single dimensional Numpy array.")
print("###############################################################################################")
print(series.values)
print("")
print("My series dimensions are: ",series.ndim)
print("My series size is:", series.size)
print("My series shpae is:", series.shape)
print("")
print("###############################################################################################")
my_array=series.values
print("My array dimensions are: ",my_array.ndim)
print("My array size is:", my_array.size)
print("My array shape is:", my_array.shape)

print("")
print("###############################################################################################")
#traditional Python for loop
for idx in range(0,my_array.size):
    print(my_array[idx]);

Dump the contents of the Series into a single dimensional Numpy array.
###############################################################################################
[ 0.2679 -0.0314 -0.0273 -1.7439  1.5618]

My series dimensions are:  1
My series size is: 5
My series shpae is: (5,)

###############################################################################################
My array dimensions are:  1
My array size is: 5
My array shape is: (5,)

###############################################################################################
0.26794576717542573
-0.031377609255666995
-0.027293919528733432
-1.7438845844349342
1.5618207799183927


In [9]:
#now actually store the series in an xarray
series.to_xarray

<bound method NDFrame.to_xarray of a    0.2679
b   -0.0314
c   -0.0273
d   -1.7439
e    1.5618
dtype: float64>

In [10]:
#dictionary type structure example
print("Key 'a' access:",series['a'])
print("")
print("Example of a bad key request for 'z' with a check:", 'z' in series)
print("")
print ("or")
print("")
print ("Key 'z' access with a .get:", series.get('z'))
print("")
print ("or perhaps more elegant")
print("")
print("Key 'z' access with a .get and return for failure:", series.get('z','Not found'))


Key 'a' access: 0.26794576717542573

Example of a bad key request for 'z' with a check: False

or

Key 'z' access with a .get: None

or perhaps more elegant

Key 'z' access with a .get and return for failure: Not found


In [11]:
#vector manipulations
add_series=series+series
print("Series added to itself:\n", add_series)
print("###############################################################################################")
multiply_series=series * 2
print("")
print("Series multiplied by 2:\n", multiply_series)


Series added to itself:
 a    0.5359
b   -0.0628
c   -0.0546
d   -3.4878
e    3.1236
dtype: float64
###############################################################################################

Series multiplied by 2:
 a    0.5359
b   -0.0628
c   -0.0546
d   -3.4878
e    3.1236
dtype: float64


In [12]:
#Series attribution
print("Name your data")
print("###############################################################################################")
print(series.name)
print("or")
series2 = series.rename("My Example Series")
print(series2.name)

Name your data
###############################################################################################
None
or
My Example Series
