# Matplotlib 기초

## 1. Matplotlib 이란?

In [1]:
import matplotlib
import matplotlib.pyplot as plt
plt.figure.max_open_warning = False

%matplotlib notebook 
#%matplotlib inline

#위와 같은 것이 %로 시작하는 것을 jupyter notebook의 magic command라고 한다.
# %who는 변수명의 리스트를 보여주고 %magic 은 모든 매직명령어를 보여준다.

# 전체적인 magic 명령어에 대해서는 다음 블로그를 참고
# http://studymake.tistory.com/601
# matplotlib 을 jupyter notebook에서 사용할 때, plot의 이미지를 보여주기 위해 magic command를 사용하는데,
# 이때 우리가 사용하는 nbagg 이외에도 다음과 같은 요소를 사용할 수 있다.
# Available matplotlib backends: ['osx', 'qt4', 'qt5', 'gtk3', 'notebook', 'wx', 'qt', 'nbagg','gtk', 'tk', 'inline']

In [2]:
import numpy as np
import pandas as pd

## 2. Plot의 종류
### 2-1 Line plot 그리기

In [3]:
# Series를 통한 line plot 그리기
s = pd.Series(np.random.randn(10).cumsum(), index=np.arange(0, 100, 10))
s

0    -0.386868
10    1.281316
20    1.766670
30    1.205010
40    2.874979
50    3.975453
60    2.067731
70    1.709880
80    1.772440
90    0.587421
dtype: float64

In [4]:
# 위에서 정의한 s라는 시리즈에 대해서 line plot을 그리고 싶다면?
s.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899ca7af70>

- s 라는 Series에서의 index와 value를 통해 그래프가 그려졌다.
- 그래프 상단의 전원버튼을 누르기 전까지 우리는 해당 그래프를 interactive하게 조작할 수 있다.

In [5]:
# DataFrame을 통한 line plot 그리기
df = pd.DataFrame(np.random.randn(10, 4).cumsum(axis=0),
                  columns=["A", "B", "C", "D"],
                  index=np.arange(0, 100, 10))
df

Unnamed: 0,A,B,C,D
0,0.804829,0.588057,-1.092658,-1.368475
10,-0.199017,2.085821,-2.124019,-3.023152
20,0.972363,2.632495,-2.139273,-3.634158
30,2.813946,2.720609,-3.1263,-3.684469
40,2.880259,3.408905,-3.216323,-4.082483
50,3.341717,4.061333,-5.782365,-6.137923
60,3.796291,3.084734,-5.200536,-8.98226
70,4.743215,2.145581,-4.139332,-8.310723
80,2.896368,1.321888,-5.894181,-8.792576
90,3.477333,2.507379,-6.354616,-8.954094


In [6]:
df.plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899d1df550>

In [7]:
plt.close('all')
# 하나의 열에 대해서만 보고 싶다면?
df['B'].plot()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899d4cde80>

### 2-2 Bar plot 그리기

In [8]:
plt.close('all')
s2 = pd.DataFrame(np.random.rand(16), index=list("abcdefghijklmnop"))

In [9]:
s2.plot(kind='bar')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899d7cadc0>

In [10]:
# 가로방향의 bar plot그리기
s2.plot(kind='barh')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899dadb760>

In [11]:
df2 = pd.DataFrame(np.random.rand(6, 4), 
                   index=["one", "two", "three", "four", "five", "six"],
                   columns=pd.Index(["A", "B", "C", "D"], name="Genus"))
df2

Genus,A,B,C,D
one,0.170379,0.14445,0.045057,0.283341
two,0.983996,0.111009,0.864649,0.84466
three,0.521577,0.53297,0.783959,0.622976
four,0.526669,0.808643,0.549464,0.00855
five,0.195867,0.74207,0.798155,0.740979
six,0.60412,0.572722,0.236118,0.981792


In [12]:
df2.plot(kind='bar')

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899d7b74f0>

In [13]:
df2.plot(kind='barh', stacked=True)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899e0d88b0>

위와 같이 Stacked 속성을 True로 설정하면, 하나의 인덱스에 대한 각 열의 값을 한줄로 쌓아서 나타내준다.

### 2-3 Histogram 그리기
- histogram은 index가 필요없다.

In [14]:
plt.close('all')
s3 = pd.Series(np.random.normal(0, 1, size=200))

In [15]:
s3.hist()

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899e409e20>

x 축의 구간 개수를 bin이라고 한다.

이를 직접 설정할 수도 있다.

In [16]:
plt.close('all')
s3.hist(bins=50)

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899e6fc160>

In [17]:
plt.close('all')
s3.hist(bins=100, density=True)
# normed 속성을 True로 설정하면, 각 bin에 속하는 개수를 전체 개수로 나눈 비율, 즉 정규화 한 값을 bar의 높이로 사용하게 된다.

<IPython.core.display.Javascript object>

<matplotlib.axes._subplots.AxesSubplot at 0x2899ecd0af0>

### 산점도(Scatter plot) 그리기
산점도의 경우에는 서로 다른 두 개의 독립변수에 대해 두 변수가 어떤 관계가 있는지 살펴보기 위해 사용된다.

In [18]:
plt.close('all')
x1 = np.random.normal(1, 1, size=(100, 1))
x2 = np.random.normal(-2, 4, size=(100, 1))
X = np.concatenate((x1, x2), axis=1)

In [19]:
df3 = pd.DataFrame(X, columns=["x1", "x2"])
df3

Unnamed: 0,x1,x2
0,0.632946,0.523379
1,-0.039774,-1.679426
2,1.953593,1.056012
3,-0.048100,-7.598126
4,0.973518,1.800171
...,...,...
95,0.283096,9.207727
96,1.550076,-2.256530
97,1.559306,2.749163
98,1.352860,-8.582143


In [20]:
plt.scatter(df3['x1'], df3['x2']) # x1이 x축, x2가 y축

<IPython.core.display.Javascript object>

<matplotlib.collections.PathCollection at 0x2899ed04e80>

## 3. Plot 모양 변형하기

### 3-1 서브 플롯 그리기

In [21]:
plt.close('all')
fig = plt.figure()
# 비어있는 figure가 생성된다.

<IPython.core.display.Javascript object>

In [22]:
# subplot 추가하기, add_subplot 에는 총 3가지 인자가 들어간다.

In [23]:
ax1 = fig.add_subplot(2, 2, 1)

- 첫번째 숫자와 두번째 숫자 : 우리가 figure를 어떤 크기로 나눌지에 대한 값이다. 즉 위의 같은 경우는 2,2 이므로 우리의 figure를 2x2로 나눈다는 뜻.
- 세번째 숫자 : 첫번째, 두번째 숫자로 나눈 figure에서 좌측상단으로 우측방향으로 숫자가 붙는다. 이때 우리가 add하고자 하는 subplot이 몇번째에 들어가는지를 나타낸다.
- 즉, 위와 같은 경우 figure는 다음과 같이 나누어진다.
- 1  2
- 3  4
- 이때 우리는 1위치에 subplot을 추가하고 해당 subplot을 ax1이라는 변수로 반환받는다.

In [24]:
ax2 = fig.add_subplot(2,2,2)

In [25]:
ax3 = fig.add_subplot(2,2,3)

In [26]:
plt.plot(np.random.randn(50).cumsum())
# 위치를 지정하지 않고 plot을 그리니 맨마지막에 그림이 그려진다.
# figure에 추가된 subplot상 맨 마지막에 위치한 곳에 그려지는 것이 아니라, 제일 마지막에 추가한 subplot에 그려진다.
# 2 -> 3 -> 1 순으로 subplot을 추가하여 테스트 해보면 1번 요소에 그려진다.

[<matplotlib.lines.Line2D at 0x2899f6a6130>]

In [27]:
plt.plot(np.random.randn(200).cumsum())
# 강의에서는 한번더 위치 지정 없이 그리면 그 전의 요소에 그려진다고 했는데,
# 실제로 진행해보면 그냥 위의 것과 똑같이 제일 마지막에 추가한 subplot에 중복되서 그려진다.

[<matplotlib.lines.Line2D at 0x2899f6c15e0>]

In [28]:
# 그럼 우리가 원하는 위치에 그림을 그리기 위해서는?
# 위에서 add_subplot 을 할때 변수명을 지정하여 반환값을 받았다.
# 해당 변수를 통해 plot을 그리면 된다.
ax1.hist(np.random.randn(100), bins = 20) # bins는 x축 bar의 개수

(array([ 1.,  0.,  0.,  1.,  2.,  2.,  5.,  7.,  2.,  9., 18.,  9., 12.,
        15.,  6.,  5.,  2.,  0.,  2.,  2.]),
 array([-3.35154742, -3.0502501 , -2.74895279, -2.44765547, -2.14635816,
        -1.84506085, -1.54376353, -1.24246622, -0.9411689 , -0.63987159,
        -0.33857427, -0.03727696,  0.26402036,  0.56531767,  0.86661499,
         1.1679123 ,  1.46920962,  1.77050693,  2.07180425,  2.37310156,
         2.67439888]),
 <a list of 20 Patch objects>)

In [29]:
ax2.scatter(np.arange(30), np.arange(30) + 3 * np.random.randn(30))

<matplotlib.collections.PathCollection at 0x2899f6fc2e0>

#### 서브플롯의 다른 정의 방법

In [30]:
fig, axes = plt.subplots(2,3)
# 위와 같이 만들면 2x3 subplot들을 가지는 figure를 만드는 것
# 이때 반환되는 값은 2개로써, figrue 자체와, 축

<IPython.core.display.Javascript object>

In [31]:
# 반환받은 axes에는 우리가 위에서 설정한 크기와 같은 shape의 리스트로 각 요소에는 subplot 객체가 들어있다.
axes

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x000002899F7020D0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002899F7330A0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002899F7611F0>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x000002899F784100>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002899F7BE400>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x000002899F7E88E0>]],
      dtype=object)

#### 서브플롯 간 간격 조절하기

In [32]:
fig, axes = plt.subplots(2, 2, sharex=True, sharey=True)
for i in range(2):
    for j in range(2):
        axes[i, j].hist(np.random.randn(500), bins=50, color='k', alpha=0.5)
        
plt.subplots_adjust(wspace=0, hspace=0)

<IPython.core.display.Javascript object>

### 3-2. Plot 꾸미기

In [33]:
plt.close()
# 지금까지의 Plot
plt.figure()
plt.plot(np.random.randn(30))

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2899f633220>]

#### 마커 및 라인 스타일 수정
##### color : 값 색상
    - "b" blue
    - "g" green
    - "r" red
    - "c" cyan
    - "m" magenta
    - "y" yellow
    - "k" black
    - "w" white

##### marker : 값 마킹
    - "." point
    - "," pixel
    - "o" circle
    - "v" triangle_down
    - "^" triangle_up
    - "<" triangle_left
    - ">" triangle_right
    - "8" octagon
    - "s" square
    - "p" pentagon
    - "*" star
    - "h" hexagon
    - "+" plus
    - "x" x
    - "D" diamond

##### line style : 값 라인 스타일
    - "-" solid line
    - "--" dashed line
    - "-." dash-dotted line
    - ":" dotted line
    - "None" draw nothing

In [34]:
plt.close('all')

plt.figure()
plt.plot(np.random.randn(50), color = 'g', marker='o', linestyle='--')

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x2899f7f6700>]

In [35]:
fig, axes = plt.subplots(2,1)

<IPython.core.display.Javascript object>

In [36]:
data = pd.Series(np.random.rand(16), index=list('abcdefghijklmnop'))
data.plot(kind='bar', ax=axes[0], color='k', alpha=0.7) #plot 함수를 그릴때, figure에서 원하는 위치를 지정하기 위해 ax 속성을 사용

<matplotlib.axes._subplots.AxesSubplot at 0x2899f8327c0>

In [37]:
data.plot(kind='barh', ax=axes[1], color='g', alpha=0.3)

<matplotlib.axes._subplots.AxesSubplot at 0x289a1039d00>

#### plot의 제목, 눈금, 레이블 수정

In [38]:
fig = plt.figure()

<IPython.core.display.Javascript object>

In [39]:
ax = fig.add_subplot(1,1,1)
ax.plot(np.random.randn(1000).cumsum())

[<matplotlib.lines.Line2D at 0x289a197bfd0>]

In [40]:
# 이때 그래프에서 나타내는 눈금을 tick이라고 한다.
# 즉, 위의 그래프의 x tick은 200이고 y tick은 10이다.
ax.set_xticks([0, 250, 500, 750, 1000])

[<matplotlib.axis.XTick at 0x289a195ec10>,
 <matplotlib.axis.XTick at 0x289a195ebe0>,
 <matplotlib.axis.XTick at 0x289a190ebb0>,
 <matplotlib.axis.XTick at 0x289a1c0c040>,
 <matplotlib.axis.XTick at 0x289a1c0c550>]

In [41]:
# 제목 입력하기
ax.set_title('random walk plot')

Text(0.5, 1.0, 'random walk plot')

In [42]:
# 라벨 입력하기
ax.set_xlabel('Stages')
ax.set_ylabel('Values')

Text(44.222222222222214, 0.5, 'Values')

In [43]:
# 눈금을 문자로 하기 위해서는?
labels = ax.set_xticklabels(['one', 'two', 'three', 'four', 'five'], rotation = 30, fontsize='small')

#### 범례, 축 범위 수정

In [44]:
plt.close('all')
fig = plt.figure()
ax = fig.add_subplot(1,1,1)

<IPython.core.display.Javascript object>

In [45]:
ax.plot(np.random.randn(1000).cumsum(), 'k', label='one')
ax.plot(np.random.randn(1000).cumsum(), 'b--', label='two')
ax.plot(np.random.randn(1000).cumsum(), 'r.', label='three')

[<matplotlib.lines.Line2D at 0x289a1c72130>]

In [46]:
# 범례 표시하기
ax.legend(loc='best')
# loc는 범례가 위치할 곳을 의미한다. best를 주게 되면 현재 그래프에서 최적의 위치를 자동으로 찾는다.

<matplotlib.legend.Legend at 0x289a1c6e580>

In [47]:
ax.get_xlim()
# 현재 그래프의 x축 범위를 가져온다.

(-49.95, 1048.95)

In [48]:
# 이를 변경하려면,
ax.set_xlim([100,900])

(100.0, 900.0)

In [49]:
ax.set_ylim([-100,100])

(-100.0, 100.0)

## 4. Matplotlib을 이용한 데이터시각화 맛보기

In [50]:
battles = pd.read_csv('game-of-thrones/battles.csv', sep=',')
deaths = pd.read_csv('game-of-thrones/character-deaths.csv', sep=',')
battles.head()

Unnamed: 0,name,year,battle_number,attacker_king,defender_king,attacker_1,attacker_2,attacker_3,attacker_4,defender_1,...,major_death,major_capture,attacker_size,defender_size,attacker_commander,defender_commander,summer,location,region,note
0,Battle of the Golden Tooth,298,1,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,...,1.0,0.0,15000.0,4000.0,Jaime Lannister,"Clement Piper, Vance",1.0,Golden Tooth,The Westerlands,
1,Battle at the Mummer's Ford,298,2,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Baratheon,...,1.0,0.0,,120.0,Gregor Clegane,Beric Dondarrion,1.0,Mummer's Ford,The Riverlands,
2,Battle of Riverrun,298,3,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,...,0.0,1.0,15000.0,10000.0,"Jaime Lannister, Andros Brax","Edmure Tully, Tytos Blackwood",1.0,Riverrun,The Riverlands,
3,Battle of the Green Fork,298,4,Robb Stark,Joffrey/Tommen Baratheon,Stark,,,,Lannister,...,1.0,1.0,18000.0,20000.0,"Roose Bolton, Wylis Manderly, Medger Cerwyn, H...","Tywin Lannister, Gregor Clegane, Kevan Lannist...",1.0,Green Fork,The Riverlands,
4,Battle of the Whispering Wood,298,5,Robb Stark,Joffrey/Tommen Baratheon,Stark,Tully,,,Lannister,...,1.0,1.0,1875.0,6000.0,"Robb Stark, Brynden Tully",Jaime Lannister,1.0,Whispering Wood,The Riverlands,


In [51]:
battles.columns

Index(['name', 'year', 'battle_number', 'attacker_king', 'defender_king',
       'attacker_1', 'attacker_2', 'attacker_3', 'attacker_4', 'defender_1',
       'defender_2', 'defender_3', 'defender_4', 'attacker_outcome',
       'battle_type', 'major_death', 'major_capture', 'attacker_size',
       'defender_size', 'attacker_commander', 'defender_commander', 'summer',
       'location', 'region', 'note'],
      dtype='object')

In [52]:
battles.shape

(38, 25)

In [53]:
deaths.shape

(917, 13)

In [54]:
deaths.columns

Index(['Name', 'Allegiances', 'Death Year', 'Book of Death', 'Death Chapter',
       'Book Intro Chapter', 'Gender', 'Nobility', 'GoT', 'CoK', 'SoS', 'FfC',
       'DwD'],
      dtype='object')

In [55]:
deaths.head()

Unnamed: 0,Name,Allegiances,Death Year,Book of Death,Death Chapter,Book Intro Chapter,Gender,Nobility,GoT,CoK,SoS,FfC,DwD
0,Addam Marbrand,Lannister,,,,56.0,1,1,1,1,1,1,0
1,Aegon Frey (Jinglebell),,299.0,3.0,51.0,49.0,1,1,0,0,1,0,0
2,Aegon Targaryen,House Targaryen,,,,5.0,1,1,0,0,0,0,1
3,Adrack Humble,House Greyjoy,300.0,5.0,20.0,20.0,1,1,0,0,0,0,1
4,Aemon Costayne,Lannister,,,,,1,1,0,0,1,0,0


In [56]:
book_nums_to_death_count = deaths['Book of Death'].value_counts().sort_index()
book_nums_to_death_count

1.0    49
2.0    73
3.0    97
4.0    27
5.0    61
Name: Book of Death, dtype: int64

In [57]:
ax1 = book_nums_to_death_count.plot(color = 'k', marker='o', linestyle='--')

# 모양 가다듬기
ax1.set_xticks(np.arange(1,6))

[<matplotlib.axis.XTick at 0x289a1c3ec70>,
 <matplotlib.axis.XTick at 0x289a1c3ec40>,
 <matplotlib.axis.XTick at 0x289a1948eb0>,
 <matplotlib.axis.XTick at 0x289a161d580>,
 <matplotlib.axis.XTick at 0x289a161da90>]

In [58]:
ax1.set_xlim([0,6])
ax1.set_ylim([0,120])

(0.0, 120.0)

In [59]:
battles = battles.set_index(['name'])
battles.head()

Unnamed: 0_level_0,year,battle_number,attacker_king,defender_king,attacker_1,attacker_2,attacker_3,attacker_4,defender_1,defender_2,...,major_death,major_capture,attacker_size,defender_size,attacker_commander,defender_commander,summer,location,region,note
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Battle of the Golden Tooth,298,1,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,,...,1.0,0.0,15000.0,4000.0,Jaime Lannister,"Clement Piper, Vance",1.0,Golden Tooth,The Westerlands,
Battle at the Mummer's Ford,298,2,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Baratheon,,...,1.0,0.0,,120.0,Gregor Clegane,Beric Dondarrion,1.0,Mummer's Ford,The Riverlands,
Battle of Riverrun,298,3,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,,...,0.0,1.0,15000.0,10000.0,"Jaime Lannister, Andros Brax","Edmure Tully, Tytos Blackwood",1.0,Riverrun,The Riverlands,
Battle of the Green Fork,298,4,Robb Stark,Joffrey/Tommen Baratheon,Stark,,,,Lannister,,...,1.0,1.0,18000.0,20000.0,"Roose Bolton, Wylis Manderly, Medger Cerwyn, H...","Tywin Lannister, Gregor Clegane, Kevan Lannist...",1.0,Green Fork,The Riverlands,
Battle of the Whispering Wood,298,5,Robb Stark,Joffrey/Tommen Baratheon,Stark,Tully,,,Lannister,,...,1.0,1.0,1875.0,6000.0,"Robb Stark, Brynden Tully",Jaime Lannister,1.0,Whispering Wood,The Riverlands,


In [60]:
large_battle_mask = battles['attacker_size'] + battles['defender_size'] > 10000
large_battles = battles.loc[large_battle_mask, ['attacker_size', 'defender_size']]
large_battles.shape

(10, 2)

In [61]:
large_battles

Unnamed: 0_level_0,attacker_size,defender_size
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Battle of the Golden Tooth,15000.0,4000.0
Battle of Riverrun,15000.0,10000.0
Battle of the Green Fork,18000.0,20000.0
Battle of the Camps,6000.0,12625.0
Battle of Oxcross,6000.0,10000.0
Siege of Storm's End,5000.0,20000.0
Battle of the Fords,20000.0,10000.0
Battle of the Blackwater,21000.0,7250.0
Battle of Castle Black,100000.0,1240.0
Siege of Winterfell,5000.0,8000.0


In [62]:
ax2 = large_battles.plot(kind='barh', stacked=True, fontsize=8)

<IPython.core.display.Javascript object>

In [63]:
large_battles['attacker_pcts'] = large_battles['attacker_size'] / (large_battles['attacker_size'] + large_battles['defender_size'])
large_battles['defender_pcts'] = large_battles['defender_size'] / (large_battles['attacker_size'] + large_battles['defender_size'])
large_battles

Unnamed: 0_level_0,attacker_size,defender_size,attacker_pcts,defender_pcts
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Battle of the Golden Tooth,15000.0,4000.0,0.789474,0.210526
Battle of Riverrun,15000.0,10000.0,0.6,0.4
Battle of the Green Fork,18000.0,20000.0,0.473684,0.526316
Battle of the Camps,6000.0,12625.0,0.322148,0.677852
Battle of Oxcross,6000.0,10000.0,0.375,0.625
Siege of Storm's End,5000.0,20000.0,0.2,0.8
Battle of the Fords,20000.0,10000.0,0.666667,0.333333
Battle of the Blackwater,21000.0,7250.0,0.743363,0.256637
Battle of Castle Black,100000.0,1240.0,0.987752,0.012248
Siege of Winterfell,5000.0,8000.0,0.384615,0.615385


In [64]:
ax3 = large_battles[['attacker_pcts', 'defender_pcts']].plot(kind='barh', stacked=True, fontsize=8)

<IPython.core.display.Javascript object>

In [65]:
battles

Unnamed: 0_level_0,year,battle_number,attacker_king,defender_king,attacker_1,attacker_2,attacker_3,attacker_4,defender_1,defender_2,...,major_death,major_capture,attacker_size,defender_size,attacker_commander,defender_commander,summer,location,region,note
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Battle of the Golden Tooth,298,1,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,,...,1.0,0.0,15000.0,4000.0,Jaime Lannister,"Clement Piper, Vance",1.0,Golden Tooth,The Westerlands,
Battle at the Mummer's Ford,298,2,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Baratheon,,...,1.0,0.0,,120.0,Gregor Clegane,Beric Dondarrion,1.0,Mummer's Ford,The Riverlands,
Battle of Riverrun,298,3,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Tully,,...,0.0,1.0,15000.0,10000.0,"Jaime Lannister, Andros Brax","Edmure Tully, Tytos Blackwood",1.0,Riverrun,The Riverlands,
Battle of the Green Fork,298,4,Robb Stark,Joffrey/Tommen Baratheon,Stark,,,,Lannister,,...,1.0,1.0,18000.0,20000.0,"Roose Bolton, Wylis Manderly, Medger Cerwyn, H...","Tywin Lannister, Gregor Clegane, Kevan Lannist...",1.0,Green Fork,The Riverlands,
Battle of the Whispering Wood,298,5,Robb Stark,Joffrey/Tommen Baratheon,Stark,Tully,,,Lannister,,...,1.0,1.0,1875.0,6000.0,"Robb Stark, Brynden Tully",Jaime Lannister,1.0,Whispering Wood,The Riverlands,
Battle of the Camps,298,6,Robb Stark,Joffrey/Tommen Baratheon,Stark,Tully,,,Lannister,,...,0.0,0.0,6000.0,12625.0,"Robb Stark, Tytos Blackwood, Brynden Tully","Lord Andros Brax, Forley Prester",1.0,Riverrun,The Riverlands,
Sack of Darry,298,7,Joffrey/Tommen Baratheon,Robb Stark,Lannister,,,,Darry,,...,0.0,0.0,,,Gregor Clegane,Lyman Darry,1.0,Darry,The Riverlands,
Battle of Moat Cailin,299,8,Balon/Euron Greyjoy,Robb Stark,Greyjoy,,,,Stark,,...,0.0,0.0,,,Victarion Greyjoy,,1.0,Moat Cailin,The North,
Battle of Deepwood Motte,299,9,Balon/Euron Greyjoy,Robb Stark,Greyjoy,,,,Stark,,...,0.0,0.0,1000.0,,Asha Greyjoy,,1.0,Deepwood Motte,The North,
Battle of the Stony Shore,299,10,Balon/Euron Greyjoy,Robb Stark,Greyjoy,,,,Stark,,...,0.0,0.0,264.0,,Theon Greyjoy,,1.0,Stony Shore,The North,Greyjoy's troop number based on the Battle of ...


In [66]:
col_names = battles.columns[4:12]
col_names

Index(['attacker_1', 'attacker_2', 'attacker_3', 'attacker_4', 'defender_1',
       'defender_2', 'defender_3', 'defender_4'],
      dtype='object')

In [67]:
# 각 가문이 얼마나 전투에 참여했는지 보기위해 먼저 col_names를 가져왔다.
# 이에 대해서 unique 처리를 하려고하는데, 이때 NaN값이 있으면 오류가 발생하므로 이를 먼저 처리한다.
# NaN을 None이라는 문자열로 대체할 것
house_names = battles[col_names].fillna("None").values
house_names[:5]

array([['Lannister', 'None', 'None', 'None', 'Tully', 'None', 'None',
        'None'],
       ['Lannister', 'None', 'None', 'None', 'Baratheon', 'None', 'None',
        'None'],
       ['Lannister', 'None', 'None', 'None', 'Tully', 'None', 'None',
        'None'],
       ['Stark', 'None', 'None', 'None', 'Lannister', 'None', 'None',
        'None'],
       ['Stark', 'Tully', 'None', 'None', 'Lannister', 'None', 'None',
        'None']], dtype=object)

In [68]:
house_names = np.unique(house_names)
house_names

array(['Baratheon', 'Blackwood', 'Bolton', 'Bracken', 'Brave Companions',
       'Brotherhood without Banners', 'Darry', 'Free folk', 'Frey',
       'Giants', 'Glover', 'Greyjoy', 'Karstark', 'Lannister',
       'Mallister', 'Mormont', "Night's Watch", 'None', 'Stark', 'Thenns',
       'Tully', 'Tyrell'], dtype=object)

In [69]:
house_names = house_names[house_names != 'None']
house_names

array(['Baratheon', 'Blackwood', 'Bolton', 'Bracken', 'Brave Companions',
       'Brotherhood without Banners', 'Darry', 'Free folk', 'Frey',
       'Giants', 'Glover', 'Greyjoy', 'Karstark', 'Lannister',
       'Mallister', 'Mormont', "Night's Watch", 'Stark', 'Thenns',
       'Tully', 'Tyrell'], dtype=object)

In [70]:
houses_to_battle_counts = pd.Series(0, index=house_names)
houses_to_battle_counts

Baratheon                      0
Blackwood                      0
Bolton                         0
Bracken                        0
Brave Companions               0
Brotherhood without Banners    0
Darry                          0
Free folk                      0
Frey                           0
Giants                         0
Glover                         0
Greyjoy                        0
Karstark                       0
Lannister                      0
Mallister                      0
Mormont                        0
Night's Watch                  0
Stark                          0
Thenns                         0
Tully                          0
Tyrell                         0
dtype: int64

In [71]:
for col in col_names:
    houses_to_battle_counts = houses_to_battle_counts.add(battles[col].value_counts(), fill_value = 0)
houses_to_battle_counts

Baratheon                      11.0
Blackwood                       1.0
Bolton                          4.0
Bracken                         1.0
Brave Companions                3.0
Brotherhood without Banners     1.0
Darry                           2.0
Free folk                       1.0
Frey                            4.0
Giants                          1.0
Glover                          2.0
Greyjoy                        11.0
Karstark                        2.0
Lannister                      18.0
Mallister                       1.0
Mormont                         2.0
Night's Watch                   1.0
Stark                          16.0
Thenns                          1.0
Tully                           7.0
Tyrell                          2.0
dtype: float64

In [72]:
plt.figure()
ax4 = houses_to_battle_counts.hist(bins=10)

<IPython.core.display.Javascript object>