In [1]:
import numpy as np
import pandas as pd

### Series Indexing

In [2]:
ind = pd.Index([2, 3, 5, 7, 11])
ind

Int64Index([2, 3, 5, 7, 11], dtype='int64')

### Index with .loc

In [3]:
populaction_dict = {
    'California': 38332521,
    'Texas': 26448193,
    'New York': 19651127,
    'Florida': 19552860,
    'Illinois': 12882135
}

In [4]:
area_dict = {
    'California': 423967,
    'Texas': 695662,
    'New York': 141297,
    'Florida': 170312,
    'Illinois': 149995
}

In [5]:
population = pd.Series(populaction_dict)
area = pd.Series(area_dict)
data = pd.DataFrame({'pop': population,
                    'area': area})
data


Unnamed: 0,pop,area
California,38332521,423967
Texas,26448193,695662
New York,19651127,141297
Florida,19552860,170312
Illinois,12882135,149995


In [6]:
data['area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [7]:
data['area']['California']

423967

In [8]:
data.loc['California']

pop     38332521
area      423967
Name: California, dtype: int64

In [9]:
data.loc[:,'area']

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [10]:
data.loc[data['area'] > 400000]

Unnamed: 0,pop,area
California,38332521,423967
Texas,26448193,695662


In [11]:
data.iloc[[1,3]]

Unnamed: 0,pop,area
Texas,26448193,695662
Florida,19552860,170312


In [12]:
data.area

California    423967
Texas         695662
New York      141297
Florida       170312
Illinois      149995
Name: area, dtype: int64

In [13]:
data['density'] = data['pop'] / data['area']
data

Unnamed: 0,pop,area,density
California,38332521,423967,90.413926
Texas,26448193,695662,38.01874
New York,19651127,141297,139.076746
Florida,19552860,170312,114.806121
Illinois,12882135,149995,85.883763


### Operations

In [14]:
rng = np.random.RandomState(42)
ser = pd.Series(rng.randint(0, 10, 4))
ser

0    6
1    3
2    7
3    4
dtype: int64

In [15]:
df = pd.DataFrame(rng.randint(0, 10, (3, 4)),
                columns=['A', 'B', 'C', 'D'])
df

Unnamed: 0,A,B,C,D
0,6,9,2,6
1,7,4,3,7
2,7,2,5,4


In [16]:
np.log(ser)

0    1.791759
1    1.098612
2    1.945910
3    1.386294
dtype: float64

In [17]:
np.log(df)

Unnamed: 0,A,B,C,D
0,1.791759,2.197225,0.693147,1.791759
1,1.94591,1.386294,1.098612,1.94591
2,1.94591,0.693147,1.609438,1.386294


In [18]:
rng2 = np.random.RandomState(42)

A = pd.DataFrame(rng2.randint(0, 20, (2, 2)),
                columns=list('AB'))
print(A)
print()

B = pd.DataFrame(rng2.randint(0, 10, (3, 3)),
                columns=list('BAC'))
print(B)

    A   B
0   6  19
1  14  10

   B  A  C
0  7  4  6
1  9  2  6
2  7  4  3


In [19]:
A + B

Unnamed: 0,A,B,C
0,10.0,26.0,
1,16.0,19.0,
2,,,
