# Configuring pandas

In [1]:
# import numpy and pandas
import numpy as np
import pandas as pd

# used for dates
import datetime
from datetime import datetime, date

# Set some pandas options controlling output format
pd.set_option('display.notebook_repr_html', False)
pd.set_option('display.max_columns', 8)
pd.set_option('display.max_rows', 10)
pd.set_option('display.width', 80)

# bring in matplotlib for graphics ,%matplotlib inline 쥬피터 노트북 실행한 브라우저 내부에서 그림을 그려주는 코드
import matplotlib.pyplot as plt
%matplotlib inline 

In [2]:
df = pd.read_csv('../data/Jan_2019_ontime_1.csv')
df

       DEST  DISTANCE
0       ATL       300
1       CVG       596
2       CVG       229
3       ATL       223
4       FSM       579
...     ...       ...
583980  HNL      3801
583981  SPN       129
583982  GUM       129
583983  GUM       129
583984  SPN       129

[583985 rows x 2 columns]

In [3]:
# get the index of the Series
df.index

RangeIndex(start=0, stop=583985, step=1)

In [4]:
len(df)

583985

In [5]:
# .size is also the # of items in the Series
df.size

1167970

In [6]:
# .shape is a tuple with one value
df.shape

(583985, 2)

In [7]:
# show the first five
df.head()

  DEST  DISTANCE
0  ATL       300
1  CVG       596
2  CVG       229
3  ATL       223
4  FSM       579

In [8]:
# the first three
df.head(n = 3) # s.head(3) is equivalent

  DEST  DISTANCE
0  ATL       300
1  CVG       596
2  CVG       229

In [9]:
# the last five
df.tail()

       DEST  DISTANCE
583980  HNL      3801
583981  SPN       129
583982  GUM       129
583983  GUM       129
583984  SPN       129

In [10]:
# the last 3
df.tail(3) # equivalent to s.tail(3)

       DEST  DISTANCE
583982  GUM       129
583983  GUM       129
583984  SPN       129

In [11]:
# only take specific items by position
df.take([1, 5, 8])

  DEST  DISTANCE
1  CVG       596
5  MSP       574
8  LGA       833

In [13]:
# explicitly  by position iloc
df.iloc[[3, 2]]

  DEST  DISTANCE
3  ATL       223
2  CVG       229

In [14]:
# explicit via labels loc
df.loc[[1, 3]]

  DEST  DISTANCE
1  CVG       596
3  ATL       223

In [16]:
# explicit via labels iloc
df.iloc[[1, 3]]

  DEST  DISTANCE
1  CVG       596
3  ATL       223

In [17]:
# lookup via list of positions iloc
df.iloc[[1, 2, 3, 4, 5]]

  DEST  DISTANCE
1  CVG       596
2  CVG       229
3  ATL       223
4  FSM       579
5  MSP       574

In [18]:
# items at position 1, 3, 5
df[1:6:2]

  DEST  DISTANCE
1  CVG       596
3  ATL       223
5  MSP       574

In [19]:
# first five by slicing, same as .head(5)
df[:5]

  DEST  DISTANCE
0  ATL       300
1  CVG       596
2  CVG       229
3  ATL       223
4  FSM       579

In [20]:
# fourth position to the end
df[4:]

       DEST  DISTANCE
4       FSM       579
5       MSP       574
6       ATL       341
7       CVG       585
8       LGA       833
...     ...       ...
583980  HNL      3801
583981  SPN       129
583982  GUM       129
583983  GUM       129
583984  SPN       129

[583981 rows x 2 columns]

In [21]:
df.index

RangeIndex(start=0, stop=583985, step=1)

In [22]:
df.values

array([['ATL', 300],
       ['CVG', 596],
       ['CVG', 229],
       ...,
       ['GUM', 129],
       ['GUM', 129],
       ['SPN', 129]], dtype=object)