### Pandas Basics

In [1]:
from pandas import Series, DataFrame

In [20]:
import pandas as pd
import numpy as np

## Series

##### A Series is a one-dimensional array-like object containing an array of data (of any NumPy data type) and an associated array of data labels, called its index. The simplest Series is formed from only an array of data:

In [4]:
obj = Series([4, 7, -5, 3])

In [5]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [6]:
obj.values

array([ 4,  7, -5,  3])

In [7]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [8]:
obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])

In [9]:
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [10]:
obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [11]:
obj2['a']

-5

In [12]:
type(obj2.index)

pandas.core.indexes.base.Index

### index of index

In [13]:
obj2.index[0]

'd'

In [14]:
obj2['d'] = 6

In [15]:
obj2[['c', 'a', 'd']]

c    3
a   -5
d    6
dtype: int64

In [16]:
obj2[obj2 > 0]

d    6
b    7
c    3
dtype: int64

In [17]:
obj2


d    6
b    7
a   -5
c    3
dtype: int64

### basic calculation

In [18]:
obj2 * 2

d    12
b    14
a   -10
c     6
dtype: int64

In [21]:
np.exp(obj2)

d     403.428793
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

In [22]:
'b' in obj2

True

In [23]:
'e' in obj2

False

### create a series from python dicts

##### Should you have data contained in a Python dict, you can create a Series from it by passing the dict:

In [24]:
# dict
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

In [25]:
# series
obj3 = Series(sdata)
obj3

Ohio      35000
Oregon    16000
Texas     71000
Utah       5000
dtype: int64

In [26]:
# pass the dict and a list
# the indices that appeared in the dict remained
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index=states)
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [30]:
# check the nan
pd.isnull(obj4)



California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [32]:
obj4.isnull()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [29]:
pd.isnull(obj4)[0]

True

In [31]:
obj4.isnull()

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [39]:
obj4.index[obj4.isnull()]


Index(['California'], dtype='object')

##### A critical Series feature for many applications is that it automatically aligns differentlyindexed data in arithmetic operations

In [40]:
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

##### Both the Series object itself and its index have a name attribute

In [43]:
obj4.name = 'population'
obj4.index.name = 'state'
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

# DataFrame

In [44]:
# DataFrame
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
'year': [2000, 2001, 2002, 2001, 2002],
'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}

In [45]:
frame = DataFrame(data)

In [46]:
DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


In [48]:
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
....: index=['one', 'two', 'three', 'four', 'five'])
frame2.columns

Index(['year', 'state', 'pop', 'debt'], dtype='object')