# 1 Basic

-  Pandas objects can be thought of as enhanced versions of NumPy structured arrays in which the rows and columns are identified with **labels** rather than simple integer indices
- Three fundamental Pandas data structures: the Series, DataFrame, and Index
- One difference between Index objects and NumPy arrays is that indices are immutable

# Examples

In [6]:
import numpy as np
import pandas as pd
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"#show intermediate output

In [11]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}
area_dict = {'California': 423967, 'Texas': 695662, 'New York': 141297,
             'Florida': 170312, 'Illinois': 149995}

area = pd.Series(area_dict)
population = pd.Series(population_dict)
population


California    38332521
Texas         26448193
New York      19651127
Florida       19552860
Illinois      12882135
dtype: int64

In [12]:
pd.DataFrame(population, columns=['population'])

states = pd.DataFrame({'population': population,
                       'area': area})
states

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


Unnamed: 0,population,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


Unnamed: 0,foo,bar
a,0.102961,0.965254
b,0.091023,0.194905
c,0.077084,0.779211


In [18]:
states['density'] = states['population'] / states['area']
states

states.values[0]
states['area']

Unnamed: 0,population,area,density
California,38332521,423967,90.413926
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746
Florida,19552860,170312,114.806121
Illinois,12882135,149995,85.883763


array([3.83325210e+07, 4.23967000e+05, 9.04139261e+01])

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [20]:
states.ix[:3, :'pop']

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552860
Illinois,12882135


In [13]:
pd.DataFrame(np.random.rand(3, 2),
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.135759,0.34074
b,0.112939,0.071676
c,0.977186,0.962096


In [9]:
data = [{'a': i, 'b': 2 * i}
        for i in range(3)]
pd.DataFrame(data)#Any list of dictionaries can be made into a ``DataFrame``.

Unnamed: 0,a,b
0,0,0
1,1,2
2,2,4
