In [1]:
# Chapter 5: Getting Started with pandas

In [3]:
import numpy as np

In [5]:
import pandas as pd

In [7]:
from pandas import Series, DataFrame

In [9]:
obj = pd.Series([4, 7, -5, 3])

In [11]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [13]:
obj.array

<NumpyExtensionArray>
[4, 7, -5, 3]
Length: 4, dtype: int64

In [15]:
obj.index

RangeIndex(start=0, stop=4, step=1)

In [17]:
obj2 = pd.Series([4, 7, -5, 3], index=["d", "b", "a", "c"])

In [19]:
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [21]:
obj2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

In [23]:
obj2["a"]

-5

In [25]:
obj2["d"] = 6

In [27]:
obj2[["c", "a", "d"]]

c    3
a   -5
d    6
dtype: int64

In [29]:
obj2[obj2 > 0]

d    6
b    7
c    3
dtype: int64

In [31]:
obj2 * 2

d    12
b    14
a   -10
c     6
dtype: int64

In [33]:
np.exp(obj2)

d     403.428793
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

In [35]:
"b" in obj2

True

In [37]:
"e" in obj2

False

In [41]:
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}

In [43]:
obj3 = pd.Series(sdata)

In [45]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [49]:
obj3.to_dict()

{'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

In [51]:
states = ["California", "Ohio", "Oregon", "Texas"]

In [53]:
obj4 = pd.Series(sdata, index=states)

In [57]:
obj4 # NaN (Not a Number)

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [59]:
# Use the terms “missing,” “NA,” or “null” interchangeably to refer to missing data.

In [61]:
pd.isna(obj4)

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [63]:
pd.notna(obj4)

California    False
Ohio           True
Oregon         True
Texas          True
dtype: bool

In [67]:
obj4.isna()

California     True
Ohio          False
Oregon        False
Texas         False
dtype: bool

In [69]:
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [71]:
obj4

California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
dtype: float64

In [73]:
obj3 + obj4

California         NaN
Ohio           70000.0
Oregon         32000.0
Texas         142000.0
Utah               NaN
dtype: float64

In [75]:
obj4.name = "population"

In [77]:
obj4.index.name = "state"

In [79]:
obj4

state
California        NaN
Ohio          35000.0
Oregon        16000.0
Texas         71000.0
Name: population, dtype: float64

In [81]:
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [83]:
obj.index = ["Bob", "Steve", "Jeff", "Ryan"]

In [85]:
obj

Bob      4
Steve    7
Jeff    -5
Ryan     3
dtype: int64