# Chapter 5. Getting Started with pandas

## Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
s = pd.Series([1, 2])

In [3]:
s.values

array([1, 2])

In [4]:
s.index

RangeIndex(start=0, stop=2, step=1)

In [5]:
s2 = np.exp(s)
s2

0    2.718282
1    7.389056
dtype: float64

In [6]:
s2.values

array([2.71828183, 7.3890561 ])

In [7]:
s

0    1
1    2
dtype: int64

In [8]:
0 in s # Checking index like dict

True

In [9]:
sdict = pd.Series({'b': 10, 'a': 2})
sdict

b    10
a     2
dtype: int64

In [10]:
# specify the order of keys
sdict2 = pd.Series({'b': 10, 'a': 2}, index=['a','b','c'])
sdict2

a     2.0
b    10.0
c     NaN
dtype: float64

In [11]:
sdict + sdict2

a     4.0
b    20.0
c     NaN
dtype: float64

In [12]:
s

0    1
1    2
dtype: int64

In [13]:
s.name = "series name"
s

0    1
1    2
Name: series name, dtype: int64

In [14]:
s.index.name = "index name"
s

index name
0    1
1    2
Name: series name, dtype: int64

## DataFrame

In [15]:
df = pd.DataFrame({'a': [1,2,3], 'b': ['1','2','3']}, index=['aa', 'bb', 'cc'])
df

Unnamed: 0,a,b
aa,1,1
bb,2,2
cc,3,3


In [16]:
bb = df.loc['bb']
bb

a    2
b    2
Name: bb, dtype: object

In [17]:
bb.index

Index(['a', 'b'], dtype='object')

In [18]:
bb.values

array([2, '2'], dtype=object)

In [19]:
bb[0] = 10 # this is not a view
bb

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


a    10
b     2
Name: bb, dtype: object

In [20]:
a = df['a']
a

aa    1
bb    2
cc    3
Name: a, dtype: int64

In [21]:
a['aa'] = 10 # this is a view
a

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


aa    10
bb     2
cc     3
Name: a, dtype: int64

In [22]:
df

Unnamed: 0,a,b
aa,10,1
bb,2,2
cc,3,3


In [23]:
df.columns.name = 'alphabet'
df

alphabet,a,b
aa,10,1
bb,2,2
cc,3,3


In [24]:
df.index.name = 'double alphabet'
df

alphabet,a,b
double alphabet,Unnamed: 1_level_1,Unnamed: 2_level_1
aa,10,1
bb,2,2
cc,3,3


In [25]:
df.values

array([[10, '1'],
       [2, '2'],
       [3, '3']], dtype=object)

## Index Object

In [26]:
index = s.index
index

RangeIndex(start=0, stop=2, step=1, name='index name')

In [27]:
try:
    index[0] = 1  # immutable
except TypeError as e:
    print(e)

Index does not support mutable operations


In [28]:
# index is like a fixed-size set but with duplicate elements
index.intersection(index)

RangeIndex(start=0, stop=2, step=1, name='index name')

## Indexing, Selection, and Filtering

In [29]:
df

alphabet,a,b
double alphabet,Unnamed: 1_level_1,Unnamed: 2_level_1
aa,10,1
bb,2,2
cc,3,3


In [30]:
df['b':'cc'] # slicing with labels is inclusive

alphabet,a,b
double alphabet,Unnamed: 1_level_1,Unnamed: 2_level_1
bb,2,2
cc,3,3


In [31]:
df['a':'cc']

alphabet,a,b
double alphabet,Unnamed: 1_level_1,Unnamed: 2_level_1
aa,10,1
bb,2,2
cc,3,3


In [32]:
df['aa':'cc']

alphabet,a,b
double alphabet,Unnamed: 1_level_1,Unnamed: 2_level_1
aa,10,1
bb,2,2
cc,3,3


## Integer Indexes

In [33]:
ser = pd.Series(np.arange(3))
ser

0    0
1    1
2    2
dtype: int64

In [36]:
try:
    ser[-1]
except KeyError as e:
    print(repr(e))

KeyError(-1,)


In [41]:
ser[:-1]

0    0
1    1
dtype: int64

In [39]:
ser2 = pd.Series(np.arange(3), index=['a', 'b', 'c'])
ser2

a    0
b    1
c    2
dtype: int64

In [40]:
ser2[-1]

2

In [43]:
ser2[:-1]

a    0
b    1
dtype: int64

## Function Application and Mapping

In [44]:
df = pd.DataFrame(np.random.randn(4,3))
df

Unnamed: 0,0,1,2
0,-1.052606,-0.008027,-1.78624
1,1.423614,-0.630388,0.064663
2,0.917881,-0.33018,0.839672
3,0.691965,0.922002,-2.018186


In [45]:
np.abs(df)

Unnamed: 0,0,1,2
0,1.052606,0.008027,1.78624
1,1.423614,0.630388,0.064663
2,0.917881,0.33018,0.839672
3,0.691965,0.922002,2.018186


In [46]:
f = lambda x: x.max() - x.min()

In [47]:
df.apply(f) # column-wise 

0    2.476220
1    1.552390
2    2.857858
dtype: float64

In [49]:
# row-wise (across columns)
df.apply(f, axis='columns')

0    1.778213
1    2.054002
2    1.248060
3    2.940188
dtype: float64

In [52]:
def f(x):
    return pd.Series([x.min(), x.max()], index=['min', 'max'])

In [53]:
df.apply(f)

Unnamed: 0,0,1,2
min,-1.052606,-0.630388,-2.018186
max,1.423614,0.922002,0.839672


In [54]:
# Element-wise
format = lambda x: '%.2f' % x

In [55]:
df.applymap(format)

Unnamed: 0,0,1,2
0,-1.05,-0.01,-1.79
1,1.42,-0.63,0.06
2,0.92,-0.33,0.84
3,0.69,0.92,-2.02


In [56]:
df[0].map(format)

0    -1.05
1     1.42
2     0.92
3     0.69
Name: 0, dtype: object

In [57]:
s = pd.Series([1, 0, 2, 2])
s

0    1
1    0
2    2
3    2
dtype: int64

In [58]:
s.rank()

0    2.0
1    1.0
2    3.5
3    3.5
dtype: float64

In [59]:
s.rank(method='first')

0    2.0
1    1.0
2    3.0
3    4.0
dtype: float64

In [60]:
s.rank(ascending=False)

0    3.0
1    4.0
2    1.5
3    1.5
dtype: float64

In [61]:
s.rank(method='max')

0    2.0
1    1.0
2    4.0
3    4.0
dtype: float64