# 실전 인터랙티브 데이터 시각화

분석할 데이터를 읽고, Bokeh 라이브러리를 사용하여 다양한 인터랙티브 시각화 만들기

## 데이터 파일 읽기

In [60]:
# 패키지 로드
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from bokeh.io import output_notebook, show
from bokeh.plotting import figure, show
output_notebook()

from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [61]:
fname ='gdrive/My Drive/Colab Notebooks/data/vgsales.csv'
data = pd.read_csv(fname)
data

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,259,Asteroids,2600,1980,Shooter,Atari,4.00,0.26,0.00,0.05,4.31
1,545,Missile Command,2600,1980,Shooter,Atari,2.56,0.17,0.00,0.03,2.76
2,1768,Kaboom!,2600,1980,Misc,Activision,1.07,0.07,0.00,0.01,1.15
3,1971,Defender,2600,1980,Misc,Atari,0.99,0.05,0.00,0.01,1.05
4,2671,Boxing,2600,1980,Fighting,Activision,0.72,0.04,0.00,0.01,0.77
...,...,...,...,...,...,...,...,...,...,...,...
16319,16565,Mighty No. 9,XOne,2016,Platform,Deep Silver,0.01,0.00,0.00,0.00,0.01
16320,16572,Resident Evil 4 HD,XOne,2016,Shooter,Capcom,0.01,0.00,0.00,0.00,0.01
16321,16573,Farming 2017 - The Simulation,PS4,2016,Simulation,UIG Entertainment,0.00,0.01,0.00,0.00,0.01
16322,16579,Rugby Challenge 3,XOne,2016,Sports,Alternative Software,0.00,0.01,0.00,0.00,0.01


## 인터랙티브 데이터 시각화

In [121]:
from bokeh.transform import factor_cmap
from bokeh.palettes import Blues8
from bokeh.models import ColumnDataSource, CategoricalColorMapper


data = data.drop_duplicates(['Platform'], keep='last')
data_cds = ColumnDataSource(data)
p_list = data_cds.data['Platform'].tolist()

p = figure(
    y_range= p_list,
    plot_width=800,
    plot_height= 600,
    title='Platform With Top Sales',
    x_axis_label='Sales')

p.hbar(
    y = 'Platform',
    right='Global_Sales',
    left= 0,
    height=.4,
    fill_color = factor_cmap(
        'Platform',
        palette=Blues8,
        factors=p_list
    ),
    fill_alpha=.9,
    source =data_cds,
    legend_label ='Sales'


)
show(p)



사용한 데이터는 비디오 게임 판매량이며, 시각화 목적은 어떤 게임이 가장 많이 팔렸는지를 비교하였습니다.

중복된 데이터가 많았기 때문에, 이를 duplicate로 정리를 하였고, 이를 가지고 ColumeDataSource를 통해 그래프 데이터 객체에 데이터를 저장하고, 각각의 레이블과 칼라를 추가함으로써, 위에 보이는 바와 같이 각 플랫폼의 판매 실적의 평균에 대해서 출력을 해 보았습니다.