## &#128205; Basic Setups

In [1]:
# Install pip packages in the current Jupyter kernel
# https://jakevdp.github.io/blog/2017/12/05/installing-python-packages-from-jupyter/ 
import sys 
!{sys.executable} -m pip install numpy
!{sys.executable} -m pip install pandas
!{sys.executable} -m pip install matplotlib==3.0.3
!{sys.executable} -m pip install seaborn



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt 
import seaborn as sns 

# 커널을 구성하다보면 에러는 아니지만, 빨간색 네모 박스 warning이 뜨는 경우가 많다. 
# 그런 보기 싫은 부분들을 제거해주겠다.
import warnings
warnings.filterwarnings('ignore')

# notebook을 실행한 브라우저에서 바로 그림을 볼 수 있게 해주는 것
%matplotlib inline 

In [3]:
# os 패키지를 통해 현재 디렉토리 위치를 변경하고, read_csv를 더 편리하게 할 수 있음
import os
os.getcwd() # 현재 디렉토리 파악
# os.chdir(r"______") # 불러오고 싶은 파일이 위치한 주소를 ___에 입력

'C:\\Users\\BokyungChoi\\Desktop\\GrowthHackers\\교육\\bonnie 세션\\시각화'

**쥬피터 노트북에서 시각화할때, matplotlib가 한글 폰트 지원하지 않아, 깨짐 처리해줘야 함**

In [4]:
import matplotlib.pyplot as plt 
import platform                

#아마 요즘 matplotlib는 이런거 안해도 잘 될수도 있어요!
from matplotlib import font_manager, rc
plt.rcParams['axes.unicode_minus']= False

if platform.system() == 'Darwin': #맥os 사용자의 경우에
    rc('font', family = 'AppleGothic')
    
elif platform.system() == 'Windows':#윈도우 사용자의 경우에
    path = 'c:/Windows/Fonts/malgun.ttf'
    font_name = font_manager.FontProperties(fname=path).get_name()
    plt.style.use('seaborn-darkgrid') # https://python-graph-gallery.com/199-matplotlib-style-sheets/
    rc('font', family=font_name)

## &#128205; Read Data

In [5]:
df=pd.read_csv("nyc_citibike.csv",encoding='euc-kr') # 한글이 들어간 csv는 encoding 인자를 넣어주는 것이 좋음
df.head()

Unnamed: 0,start_date,end_date,start_hour,end_hour,trip_duration,start_station_id,start_station_name,start_station_latitude,start_station_longitude,end_station_id,end_station_name,end_station_latitude,end_station_longitude,bike_id,usertype,birth_year,gender,day_since_register
0,2018-05-01,2018-05-01,4,4,390,3002,South End Ave & Liberty St,40.711512,-74.015756,79,Franklin St & W Broadway,40.719116,-74.006667,21105,Subscriber,1945,male,36
1,2018-05-01,2018-05-01,1,1,854,3611,Vernon Blvd & 47 Rd,40.744907,-73.953457,3572,34 Ave & 38 St,40.756913,-73.921631,32219,Subscriber,1998,male,29
2,2018-05-01,2018-05-01,4,4,526,515,W 43 St & 10 Ave,40.760094,-73.994618,388,W 26 St & 10 Ave,40.749718,-74.00295,33433,Subscriber,1954,male,55
3,2018-05-01,2018-05-01,3,3,885,3641,Broadway & W 25 St,40.742869,-73.989186,336,Sullivan St & Washington Sq,40.730477,-73.999061,32028,Subscriber,1963,male,32
4,2018-05-01,2018-05-01,1,1,324,3440,Fulton St & Adams St,40.692418,-73.989495,3308,Kane St & Clinton St,40.686176,-73.996453,26425,Subscriber,1965,male,34


In [6]:
df['start_station_id']=df['start_station_id'].astype(str)
df['end_station_id']=df['end_station_id'].astype(str)
df['bike_id']=df['bike_id'].astype(str)

In [7]:
# 분석 결과의 이해를 돕기위해 데이터 단위를 바꿔줄 수도 있다.
# 현재 예시 df에서 trip_duration은 second 단위여서 직관적으로 와닿지 않는다.
# trip_duration_min으로 바꿔준다.
df['trip_duration_min']=df['trip_duration']/60

In [8]:
cut_point = df["trip_duration_min"].quantile(0.99)
df_cut=df[df['trip_duration_min'] < cut_point]

## &#128205; 좌표 기반 Folium 활용 시각화

In [9]:
!{sys.executable} -m pip install folium
import folium



### 1. 빈 캔버스 역할을 하는 지도를 그린다

In [10]:
def generateBaseMap(default_location=[40.746176, -73.996453], 
                    default_zoom_start=11):
    base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
    return base_map

In [11]:
base_map=generateBaseMap()
base_map

### 2. 데이터프레임의 위도, 경도를 넣어줘 빈 캠버스에 히트맵을 칠한다

기반 데이터는 현재 df_cut 입니다

In [12]:
from folium.plugins import HeatMap
HeatMap(data=df_cut[['start_station_latitude', 'start_station_longitude']].groupby(['start_station_latitude', 'start_station_longitude']).sum().reset_index().values.tolist(), 
        radius=8, max_zoom=11).add_to(base_map)

<folium.plugins.heat_map.HeatMap at 0x22e7e0fb710>

In [13]:
base_map

### 3. 각 Unique한 Station에 대한 좌표를 찍는다

In [14]:
# 가장 대여 기록이 많은 시작 Station 10개
top_list=df_cut['start_station_name'].value_counts().nlargest(10).index.to_list()
top_list

['Pershing Square North',
 'West St & Chambers St',
 'Broadway & E 22 St',
 'W 21 St & 6 Ave',
 '8 Ave & W 33 St',
 'E 17 St & Broadway',
 'E 47 St & Park Ave',
 'W 41 St & 8 Ave',
 'W 22 St & 10 Ave',
 'W 38 St & 8 Ave']

In [15]:
# Station별 위도 경도 Unique값만 추리기
station_unique=df_cut[['start_station_name','start_station_latitude', 'start_station_longitude']].drop_duplicates()
station_unique.head()

Unnamed: 0,start_station_name,start_station_latitude,start_station_longitude
0,South End Ave & Liberty St,40.711512,-74.015756
1,Vernon Blvd & 47 Rd,40.744907,-73.953457
2,W 43 St & 10 Ave,40.760094,-73.994618
3,Broadway & W 25 St,40.742869,-73.989186
4,Fulton St & Adams St,40.692418,-73.989495


In [16]:
station_unique_10=station_unique[station_unique['start_station_name'].isin(top_list)]

In [17]:
station_unique_10

Unnamed: 0,start_station_name,start_station_latitude,start_station_longitude
6,8 Ave & W 33 St,40.751551,-73.993934
33,W 38 St & 8 Ave,40.754666,-73.991382
50,Pershing Square North,40.751873,-73.977706
114,W 41 St & 8 Ave,40.756405,-73.990026
116,Broadway & E 22 St,40.740343,-73.989551
139,W 21 St & 6 Ave,40.74174,-73.994156
257,W 22 St & 10 Ave,40.74692,-74.004519
303,West St & Chambers St,40.717548,-74.013221
363,E 47 St & Park Ave,40.755103,-73.974987
390,E 17 St & Broadway,40.73705,-73.990093


In [18]:
data=station_unique_10
data=data.rename(columns={'start_station_longitude':'lon',
                     'start_station_latitude':'lat',
                     'start_station_name':'name'})

for i in range(0,len(data)): 
    folium.Marker([data.iloc[i]['lat'], data.iloc[i]['lon']],
                  popup=data.iloc[i]['name']).add_to(base_map)

In [19]:
base_map

### 4. 개인 로컬에 html로 저장

In [20]:
base_map.save('map_nyc_citibike.html')

로컬에서 map_nyc_citibike.html을 검색하고 열어보세요!