In [None]:
import numpy as np
np.__version__

In [None]:
import pandas as pd
pd.__version__

## Pandas Series
### Constructing Series objects
> pd.Series(data, index=index)

In [None]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
print(data)
print(data.values)
print(data.index)

In [None]:
data = pd.Series(1, index =['a', 'b', 'c', 'd'])
print(data)
data = pd.Series([0.25, 0.5, 0.75, 1.0], index =['a', 'b', 'c', 'd'])
print(data)
print(pd.Series(data, index = ['c','a']))


> When creating pd Series, index array should be the <span style="color:Crimson">same lengths as data array </span>.
>
> *data = pd.Series([0.25, 0.5, 0.75, 1.0], index =['a', 'b'])*  would reture ERROR 
>
> However, we could set index array to indexing a created serise
> 

#### data can be a dictionary, in which index defaults to the sorted dictionary keys

In [None]:
print(pd.Series({2:'a', 1:'b', 3:'c'}))
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
print(area)
population_dict = {'California': 38332521,
'Texas': 26448193,
'New York': 19651127,
'Florida': 19552860,
'Illinois': 12882135}
population = pd.Series(population_dict)
print(population)

## Pandas DataFrame
### 
#### DataFrame as a generalized NumPy array

In [None]:
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
'Florida': 170312, 'Illinois': 149995}
area = pd.Series(area_dict)
print(area)
population_dict = {'California': 38332521,
'Texas': 26448193,
'New York': 19651127,
'Florida': 19552860,
'Illinois': 12882135}
population = pd.Series(population_dict)
print(population)
states = pd.DataFrame({'population': population, 'area': area})
states

#### DataFrame as specialized dictionary

In [None]:
print(states['area'])
print(states['population'])

### Constructing DataFrame objects
#### From a single Series object.

In [None]:
pd.DataFrame(population, columns=['population'])

#### From a list of dicts

In [None]:
data = [{'a': i, 'b': 2 * i}
for i in range(3)]
pd.DataFrame(data)

#### From a two-dimensional NumPy array

In [None]:
pd.DataFrame(np.random.rand(3, 2),
columns=['foo', 'bar'],
index=['a', 'b', 'c'])

#### From a NumPy structured array

In [None]:
A = np.zeros(3, dtype=[('A', 'i8'), ('B', 'f8')],)
print(A)
Data = pd.DataFrame(A)
print(Data)
pd.DataFrame(A,index=['a', 'b', 'c'])

### The Pandas Index Object

#### Data Selection in Series

In [None]:
data = pd.Series([0.25, 0.5, 0.75, 1.0], index=['a', 'b', 'c', 'd'])
print(data)
print(data['b'])

#### Data Selection in DataFrame

In [None]:
area = pd.Series({'California': 423967, 'Texas': 695662,
'New York': 141297, 'Florida': 170312,
'Illinois': 149995})
pop = pd.Series({'California': 38332521, 'Texas': 26448193,
'New York': 19651127, 'Florida': 19552860,
'Illinois': 12882135})
data = pd.DataFrame({'area':area, 'pop':pop})
data

In [None]:
data['density'] = data['pop'] / data['area']
data

In [None]:
print(data.values)

In [None]:
print(data.values[0])
print(data.loc["California":"Florida"])
print(data.iloc[0:4])
data.iloc[:3, :2]

In [None]:
data.loc[:'Illinois', :'pop']

In [None]:
#Indexers: Series.ix and Series.ix and DataFrame.ix  are deprecated and removed in latest pandas 
#data.ix[:3, :'pop'] 

In [None]:
data.loc[data.density > 100, ['area', 'pop', 'density']]

In [None]:
data.loc[data.density > 100, ['area', 'pop', 'density']]

In [None]:
data.loc[data.density > 100, data.loc['New York'] > 140]

In [None]:
data

In [None]:
data['Florida':'Illinois']

In [None]:
data[1:3]

> take care, <span style="color:Crimson">only one row slicing</span> 
>, like data[1], is not correct 

## Operating on Data in Pandas
### UFuncs: Index Alignment