In [158]:
import numpy as np
import pandas as pd

<H1 style="color:CornflowerBlue;">Pandas Series</H1>

In [159]:
data = [0.25, 0.5, 0.75, 1]
series = pd.Series(data) # An indexed 1D array with an implicit index
seriesOdd = pd.Series(data,
                     index=['a', 'b', 'c', 'd'])  # An indexed 1D array with an explicit index
print(series)
print(seriesOdd)
print()
print(series.values)

print(series[1])  # accessing elements is the same as numpy
print(seriesOdd['a'])

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64
a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

[0.25 0.5  0.75 1.  ]
0.5
0.25


<H1 style="color:CornflowerBlue;">Pandas DataFrame </H1>

In [160]:
# As a 2D NumPy array
data = {'California':423957, 'Texas':695662, 'New York':141297, 'Florida':170312, 'Illinois':149995}
pop = {'California':38332521, 'Texas':26448193, 'New York':19651127, 'Florida':19552760, 'Illinois':12882135} 
area = pd.Series(data)
population = pd.Series(pop)

states = pd.DataFrame({'population': population,
                      'area': area})
states

Unnamed: 0,population,area
California,38332521,423957
Texas,26448193,695662
New York,19651127,141297
Florida,19552760,170312
Illinois,12882135,149995


In [161]:
# A more typical way of creating a DataFrame
pd.DataFrame(population, columns=['population'])

Unnamed: 0,population
California,38332521
Texas,26448193
New York,19651127
Florida,19552760
Illinois,12882135


In [162]:
data = [{'a': i, 'b': 2 ** i }   # create a list of dictionaries (one for each row), 10 times and convert them to a DataFrame
        for i in range(3) ]  
pd.DataFrame(data)

Unnamed: 0,a,b
0,0,1
1,1,2
2,2,4


In [163]:
pd.DataFrame(np.random.rand(3, 2),   # Create a DataFrame from a 3 X 2 numpy array
             columns=['foo', 'bar'],
             index=['a', 'b', 'c'])

Unnamed: 0,foo,bar
a,0.707006,0.646417
b,0.014762,0.663284
c,0.902012,0.467429


<H1 style="color:CornflowerBlue;">Accessing data in a Pandas Series or DataFrame</H1>

In [164]:
# For Series data
data = pd.Series([0.25, 0.5, 0.75, 1],
                  index=['a', 'b', 'c', 'd'])
print(data['b']); print()   # array like indexing
print(data.keys()); print() # access the index
print(data.values); print() # access the values

0.5

Index(['a', 'b', 'c', 'd'], dtype='object')

[0.25 0.5  0.75 1.  ]



In [165]:
print(data['a':'c']); print()                      # Slice by explicit index (inclusive)
print(data[1:3]); print()                          # Slice by implicit index (non-inclusive)
print(data[(data > .3) & (data < 1.1)]); print()   # Masking (bool based slicing)
print(data[['a', 'b']]); print()                   # Fancy indexing (using a variable in the place of a slice)

a    0.25
b    0.50
c    0.75
dtype: float64

b    0.50
c    0.75
dtype: float64

b    0.50
c    0.75
d    1.00
dtype: float64

a    0.25
b    0.50
dtype: float64



In [166]:
print('New Series')
data = pd.Series(['a', 'b', 'c'], index=[1, 3, 5])
print(data); print('-----'); print()

# using loc and iloc - Indexer Attributes
print(data.loc[1]);           # loc uses explicit indexing (inclusive)
print(data.loc[1:3]); print()     # Return the values for indexes 1 and 3 (a 2 would not return a value)

print(data.iloc[1]);           # iloc uses implicit indexing (non-inclusive)  
print(data.iloc[1:3]);            # Return the values with an index range of 1 to 3 (noninclusive) --> the confusing part

New Series
1    a
3    b
5    c
dtype: object
-----

a
1    a
3    b
dtype: object

b
3    b
5    c
dtype: object


In [167]:
# For DataFrames
# Same setup as above
data = {'California':423957, 'Texas':695662, 'New York':141297, 'Florida':170312, 'Illinois':149995}
pop = {'California':38332521, 'Texas':26448193, 'New York':19651127, 'Florida':19552760, 'Illinois':12882135} 
area = pd.Series(data)
population = pd.Series(pop)

states = pd.DataFrame({'population': population,
                      'area': area})
print(states); print()
print(states['area']); print()       # Return the first column
print(states.area); print()          # Pandas adds attribute accessor syntax

            population    area
California    38332521  423957
Texas         26448193  695662
New York      19651127  141297
Florida       19552760  170312
Illinois      12882135  149995

California    423957
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

California    423957
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64



In [168]:
print(states.values[0]); print()     # Return the first row values
print(states.iloc[:1, :]); print()   # Return the first row
print(states.loc[:'California', :])  # Return the first row

[38332521   423957]

            population    area
California    38332521  423957

            population    area
California    38332521  423957


In [169]:
print(states.iloc[:3, :2]); print()              # Return the first 3 columns and the first 2 columns iloc (non-inclusive)
print(states.loc[:'Florida', :'area']); print()  # Return the first 3 columns and the first 2 columns loc (inclusive)

            population    area
California    38332521  423957
Texas         26448193  695662
New York      19651127  141297

            population    area
California    38332521  423957
Texas         26448193  695662
New York      19651127  141297
Florida       19552760  170312



In [170]:
print('add column density')
states['density'] = states['population'] / states['area']
print(states); print()
print(states.loc[states.density > 100, ['population', 'area']])  # filter based on a Boolean condition and drop the density column 
print(); 
states.iloc[0, -1] = 90             # Set the density for the first column
states

add column density
            population    area     density
California    38332521  423957   90.416059
Texas         26448193  695662   38.018740
New York      19651127  141297  139.076746
Florida       19552760  170312  114.805533
Illinois      12882135  149995   85.883763

          population    area
New York    19651127  141297
Florida     19552760  170312



Unnamed: 0,population,area,density
California,38332521,423957,90.0
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746
Florida,19552760,170312,114.805533
Illinois,12882135,149995,85.883763
