### DataFrame 인덱싱 이해하기

In [1]:
import pandas as pd
import numpy as np

In [2]:
data = {"names": ["Kilho", "Kilho", "Kilho", "Charles", "Charles"],
           "year": [2014, 2015, 2016, 2015, 2016],
           "points": [1.5, 1.7, 3.6, 2.4, 2.9]}
df = pd.DataFrame(data, columns=["year", "names", "points", "penalty"],
                          index=["one", "two", "three", "four", "five"])

In [3]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Kilho,1.5,
two,2015,Kilho,1.7,
three,2016,Kilho,3.6,
four,2015,Charles,2.4,
five,2016,Charles,2.9,


In [4]:
# 데이터 프레임의 year 부분만 추출  (시리즈 형태)
df["year"]

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [5]:
#위에 것과 기능 동일
df.year

one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [8]:
# 복수 개 선택
df[["year","points"]]

Unnamed: 0,year,points
one,2014,1.5
two,2015,1.7
three,2016,3.6
four,2015,2.4
five,2016,2.9


In [9]:
# 새로운 열 추가
df["penalty"] = 0.5

In [10]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Kilho,1.5,0.5
two,2015,Kilho,1.7,0.5
three,2016,Kilho,3.6,0.5
four,2015,Charles,2.4,0.5
five,2016,Charles,2.9,0.5


In [11]:
# 추가한 열 값 바꿈
df["penalty"] = [1,2,3,4,5]

In [12]:
df

Unnamed: 0,year,names,points,penalty
one,2014,Kilho,1.5,1
two,2015,Kilho,1.7,2
three,2016,Kilho,3.6,3
four,2015,Charles,2.4,4
five,2016,Charles,2.9,5


In [13]:
# 새로운 열 추가
df["zeros"] = np.arange(5)

In [14]:
df

Unnamed: 0,year,names,points,penalty,zeros
one,2014,Kilho,1.5,1,0
two,2015,Kilho,1.7,2,1
three,2016,Kilho,3.6,3,2
four,2015,Charles,2.4,4,3
five,2016,Charles,2.9,5,4


In [16]:
val = pd.Series([-1.2,-1.5,-1.7], index = ["two","four","five"])

In [18]:
df["debt"] = val

In [19]:
df

Unnamed: 0,year,names,points,penalty,zeros,debt
one,2014,Kilho,1.5,1,0,
two,2015,Kilho,1.7,2,1,-1.2
three,2016,Kilho,3.6,3,2,
four,2015,Charles,2.4,4,3,-1.5
five,2016,Charles,2.9,5,4,-1.7


In [20]:
df["net_points"] = df["points"] - df["penalty"]

In [23]:
df["high_points"] = df["net_points"] > 2.0

In [24]:
df

Unnamed: 0,year,names,points,penalty,zeros,debt,net_points,high_points
one,2014,Kilho,1.5,1,0,,0.5,False
two,2015,Kilho,1.7,2,1,-1.2,-0.3,False
three,2016,Kilho,3.6,3,2,,0.6,False
four,2015,Charles,2.4,4,3,-1.5,-1.6,False
five,2016,Charles,2.9,5,4,-1.7,-2.1,False


In [25]:
# 데이터 프레임 열 삭제
del df["high_points"]

In [26]:
del df["net_points"]

In [27]:
del df["zeros"]

In [28]:
df

Unnamed: 0,year,names,points,penalty,debt
one,2014,Kilho,1.5,1,
two,2015,Kilho,1.7,2,-1.2
three,2016,Kilho,3.6,3,
four,2015,Charles,2.4,4,-1.5
five,2016,Charles,2.9,5,-1.7


In [29]:
# 데이터프레임 컬럼들 조회
df.columns

Index([u'year', u'names', u'points', u'penalty', u'debt'], dtype='object')

In [30]:
# 인덱스와 컬럼에 이름 지정
df.index.name = "Order"
df.columns.name = "Info"

In [31]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014,Kilho,1.5,1,
two,2015,Kilho,1.7,2,-1.2
three,2016,Kilho,3.6,3,
four,2015,Charles,2.4,4,-1.5
five,2016,Charles,2.9,5,-1.7


In [32]:
# 데이터 인덱싱 . [0:3]행 조회 
df[0:3]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014,Kilho,1.5,1,
two,2015,Kilho,1.7,2,-1.2
three,2016,Kilho,3.6,3,


In [34]:
df["two":"four"]
# 비추하는 행 조회 방법

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015,Kilho,1.7,2,-1.2
three,2016,Kilho,3.6,3,
four,2015,Charles,2.4,4,-1.5


In [35]:
# 권장하는 방법 : .loc() .iloc()
# .loc : 실제 인덱스를 사용하여 행을 가져올 때 사용
# .iloc : numpy의 array 인덱싱 방식으로 행을 가져올 때 사용 

df.loc["two"]

Info
year        2015
names      Kilho
points       1.7
penalty        2
debt        -1.2
Name: two, dtype: object

In [36]:
df.loc["two":"four"]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015,Kilho,1.7,2,-1.2
three,2016,Kilho,3.6,3,
four,2015,Charles,2.4,4,-1.5


In [37]:
df.loc["two":"four","points"]

Order
two      1.7
three    3.6
four     2.4
Name: points, dtype: float64

In [38]:
# df["year"] 과 동일
df.loc[:,"year"]

Order
one      2014
two      2015
three    2016
four     2015
five     2016
Name: year, dtype: int64

In [39]:
# 두개 이상의 열 선택
df.loc[:,["year","names"]]

Info,year,names
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,2014,Kilho
two,2015,Kilho
three,2016,Kilho
four,2015,Charles
five,2016,Charles


In [41]:
df.loc["three":"five","year":"penalty"]

Info,year,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
three,2016,Kilho,3.6,3
four,2015,Charles,2.4,4
five,2016,Charles,2.9,5


In [42]:
# 행 추가하기
df.loc["six",:] = [2013,"Hayoung", 4.0, 0.1, 2.1]

In [43]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,Kilho,1.5,1.0,
two,2015.0,Kilho,1.7,2.0,-1.2
three,2016.0,Kilho,3.6,3.0,
four,2015.0,Charles,2.4,4.0,-1.5
five,2016.0,Charles,2.9,5.0,-1.7
six,2013.0,Hayoung,4.0,0.1,2.1


In [44]:
# .iloc 는 numpy 의 array 인덱싱 방식으로 행을 가져올 때 사용
df.iloc[3]

Info
year          2015
names      Charles
points         2.4
penalty          4
debt          -1.5
Name: four, dtype: object

In [45]:
df.iloc[3:5,0:2]

Info,year,names
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
four,2015.0,Charles
five,2016.0,Charles


In [46]:
#원하는 행렬의 값만을 가져오고자 할 때 
df.iloc[[0,1,3],[1,2]]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,Kilho,1.5
two,Kilho,1.7
four,Charles,2.4


In [47]:
df.iloc[:,1:4]

Info,names,points,penalty
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,Kilho,1.5,1.0
two,Kilho,1.7,2.0
three,Kilho,3.6,3.0
four,Charles,2.4,4.0
five,Charles,2.9,5.0
six,Hayoung,4.0,0.1


In [50]:
df.iloc[1,1]

'Kilho'

In [51]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,Kilho,1.5,1.0,
two,2015.0,Kilho,1.7,2.0,-1.2
three,2016.0,Kilho,3.6,3.0,
four,2015.0,Charles,2.4,4.0,-1.5
five,2016.0,Charles,2.9,5.0,-1.7
six,2013.0,Hayoung,4.0,0.1,2.1


In [52]:
# 불리언 인덱싱 알아보기

df["year"] > 2014

Order
one      False
two       True
three     True
four      True
five      True
six      False
Name: year, dtype: bool

In [53]:
df.loc[df["year"] > 2014, :]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
two,2015.0,Kilho,1.7,2.0,-1.2
three,2016.0,Kilho,3.6,3.0,
four,2015.0,Charles,2.4,4.0,-1.5
five,2016.0,Charles,2.9,5.0,-1.7


In [54]:
df.loc[df["names"] == "Kilho" , ["names","points"]]

Info,names,points
Order,Unnamed: 1_level_1,Unnamed: 2_level_1
one,Kilho,1.5
two,Kilho,1.7
three,Kilho,3.6


In [55]:
df.loc[(df["points"] > 2) & (df["points"] < 3) , :]

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
four,2015.0,Charles,2.4,4.0,-1.5
five,2016.0,Charles,2.9,5.0,-1.7


In [56]:
df.loc[df["points"] > 3 , "penalty"] = 0

In [57]:
df

Info,year,names,points,penalty,debt
Order,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
one,2014.0,Kilho,1.5,1.0,
two,2015.0,Kilho,1.7,2.0,-1.2
three,2016.0,Kilho,3.6,0.0,
four,2015.0,Charles,2.4,4.0,-1.5
five,2016.0,Charles,2.9,5.0,-1.7
six,2013.0,Hayoung,4.0,0.0,2.1
