In [1]:
import pandas as pd
import numpy as np
import platform
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

#한글 깨짐 방지
from matplotlib import font_manager, rc
plt.rcParams['axes.unicode_minus'] = False


if platform.system() =='Darwin':
    rc('font', family = 'AppleGothic')
elif platform.system() == 'Windows':
    path = "c:/Windows/Fonts/malgun.ttf"
    font_name = font_manager.FontProperties(fname = path).\
    get_name()
    rc('font', family = font_name)
else : 
    print('Unknown system...sorry~~~~')

In [2]:
from chart_studio.plotly import plot
from chart_studio.plotly import iplot
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.tools as tls
from plotly.offline import *
import plotly.io as pio
import colorlover as cl
from IPython.display import HTML

In [3]:
import plotly
plotly.offline.init_notebook_mode(connected=True)

# 구디역 인근 8개 정류장 시각화

In [4]:
gudi_bike = pd.read_csv('../따릉따릉/visual/따릉이_역8개합_날씨없음.csv')

In [5]:
gudi_bike['date'] = pd.to_datetime(gudi_bike['date'], format = "%Y-%m-%d")

In [6]:
gudi_bike.groupby('day')['count'].sum()

day
Fri    536
Mon    421
Sat    208
Sun    117
Thu    518
Tue    526
Wed    444
Name: count, dtype: int64

In [7]:
gudi_bike['place'] = gudi_bike['place'].astype("category")

In [8]:
gudi_bike.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2920 entries, 0 to 2919
Data columns (total 5 columns):
date       2920 non-null datetime64[ns]
place      2920 non-null category
count      2920 non-null int64
holiday    2920 non-null int64
day        2920 non-null object
dtypes: category(1), datetime64[ns](1), int64(2), object(1)
memory usage: 94.6+ KB


In [71]:
station_sum= bike_metro.groupby('station_name')['count'].sum().sort_values(ascending = False)
station_sum

station_name
1911. 구로디지털단지역 앞     766
1924. 삼부르네상스파크빌      510
2115. 관악농협농산물백화점     379
1955. 디지털입구 교차로      337
1828. 한양수자인아파트 앞     246
2012. 녹십초요양병원앞       233
2113. 관악동작견인차량보관소    157
2110. 조원동 미성아파트      142
Name: count, dtype: int64

In [72]:
fig = go.Figure()
fig.add_bar(x= bike_metro['station_name'].unique(), y= station_sum)
fig.update_traces(marker_color='rgb(158,202,225)', marker_line_color='rgb(8,48,107)',
                  marker_line_width=1.5, opacity=0.6)
fig.show()

In [42]:
station_mean = bike_metro.groupby('station_name')['count'].mean().sort_values(ascending = False)

In [43]:
station_mean

station_name
1911. 구로디지털단지역 앞     2.098630
1924. 삼부르네상스파크빌      1.397260
2115. 관악농협농산물백화점     1.038356
1955. 디지털입구 교차로      0.923288
1828. 한양수자인아파트 앞     0.673973
2012. 녹십초요양병원앞       0.638356
2113. 관악동작견인차량보관소    0.430137
2110. 조원동 미성아파트      0.389041
Name: count, dtype: float64

In [44]:
fig = go.Figure()
fig.add_trace(go.Bar(y= gudi_bike.groupby('day')['count'].sum(),
                x = gudi_bike['day'].unique()))
fig.update_traces(marker_color = 'pink',
                  marker_line_width=1.5, opacity=0.6)
fig.update_layout(title_text="구디역 인근 8개 따릉이 정류장 총 대여건수(8~9시)")
fig.show()

In [45]:
fig = make_subplots(
    rows=3, cols=1,
    row_width=[0.67, 0.67, 0.67],
    start_cell="top-left",
    subplot_titles=("정류장별", "요일별", "평일/휴일별"))

fig.add_bar(x= bike_metro['station_name'].unique(), y= station_mean, marker_color = 'skyblue',
            row=1, col=1, opacity=0.6)

fig.add_bar(y=gudi_bike.groupby('day')['count'].mean(), 
            x = gudi_bike['day'].unique(), marker_color='lightsalmon', row=2, col=1,opacity=0.6)

fig.add_bar(y=gudi_bike.groupby('holiday')['count'].mean(), 
            x = ['평일', '휴일'], row=3, col=1, opacity=0.6)

fig.update_layout(showlegend= False, 
                  title_text="구디역 인근 8개 따릉이 정류장 평균 대여건수(8~9시)",
                  height=800, width=800)



fig.show()

In [46]:
gudi_1828 = pd.read_csv('../따릉따릉/visual/p_1828.csv')
gudi_1911 = pd.read_csv('../따릉따릉/visual/p_1911.csv')
gudi_1924 = pd.read_csv('../따릉따릉/visual/p_1924.csv')
gudi_1955 = pd.read_csv('../따릉따릉/visual/p_1955.csv')
gudi_2012 = pd.read_csv('../따릉따릉/visual/p_2012.csv')
gudi_2110 = pd.read_csv('../따릉따릉/visual/p_2110.csv')
gudi_2113 = pd.read_csv('../따릉따릉/visual/p_2113.csv')
gudi_2115 = pd.read_csv('../따릉따릉/visual/p_2115.csv')

In [75]:
gudi_bike[['place', 'count']]

Unnamed: 0,place,count
0,1911,2
1,1911,1
2,1911,1
3,1911,1
4,1911,2
5,1911,1
6,1911,0
7,1911,2
8,1911,1
9,1911,0


In [48]:
fig = go.Figure()

fig.add_scatter(x= gudi_1911['date'], y= gudi_bike.groupby('date')['count'].sum(), name='대여건수',
                         line=dict(color='royalblue', width=3))
fig.add_scatter(x = gudi_1911['date'], y = gudi_1911['mean_t'], name = '일평균기온')

fig.add_bar(x = gudi_1911['date'], y = gudi_1911['fine_dust'], name = '미세먼지')

fig.update_layout(height=500, width=1000,
                  title_text="일일 대여건수와 날씨의 상관관계")

fig.update_layout(
    xaxis=go.layout.XAxis(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)
fig.show()

In [49]:
plotly.offline.plot(fig, filename='fig_line.html')

'fig_line.html'

In [78]:
fig1 = go.Figure()
fig1.add_scatter(x= gudi_1911['date'], y= gudi_1911['count'], 
                mode='markers', name='1911. 구로디지털단지역 앞',marker=dict(color='SkyBlue', size= gudi_1911['count']*5))
fig1.add_scatter(x = gudi_1828['date'], y = gudi_1828['count'], 
                mode='markers', name = '1828. 한양수자인아파트 앞',marker=dict(color='Red', size= gudi_1828['count']*5))
fig1.add_scatter(x = gudi_1924['date'], y = gudi_1924['count'], 
                mode='markers', name = '1924. 삼부르네상스파크빌',marker=dict(color = 'Pink', size= gudi_1924['count']*5))
fig1.add_scatter(x = gudi_1955['date'], y = gudi_1955['count'],
                mode='markers', name = '1955. 디지털입구 교차로',marker=dict(size= gudi_1955['count']*5))
fig1.add_scatter(x = gudi_2012['date'], y = gudi_2012['count'],
                mode='markers', name = '2012. 녹십초요양병원앞',marker=dict(size= gudi_2012['count']*5))
fig1.add_scatter(x = gudi_2110['date'], y = gudi_2110['count'],
                mode='markers', name = '2110. 조원동 미성아파트',marker=dict(size= gudi_2110['count']*5))
fig1.add_scatter(x = gudi_2113['date'], y = gudi_2113['count'],
                mode='markers', name = '2113. 관악동작견인차량보관소', marker=dict(size= gudi_2113['count']*5))
fig1.add_scatter(x = gudi_2115['date'], y = gudi_2115['count'],
                mode='markers', name = '2115. 관악농협농산물백화점', marker=dict(size= gudi_2115['count']*5))

fig1.update_layout(height=600, width=1100,
                  title_text="구디 인근 8개 정류소 대여건수",)

fig1.update_layout(
    xaxis=go.layout.XAxis(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                     label="1m",
                     step="month",
                     stepmode="backward"),
                dict(count=6,
                     label="6m",
                     step="month",
                     stepmode="backward"),
                dict(count=1,
                     label="YTD",
                     step="year",
                     stepmode="todate"),
                dict(count=1,
                     label="1y",
                     step="year",
                     stepmode="backward"),
                dict(step="all")
            ])
        ),
        rangeslider=dict(
            visible=True
        ),
        type="date"
    )
)

fig1.show()

In [177]:
plotly.offline.plot(fig1, filename='fig1_bubble.html')

'fig1_bubble.html'

In [51]:
#정류소별 대여건수

fig = make_subplots(
    rows=4, cols=2,
    subplot_titles=('1911. 구로디지털단지역 앞', "1828. 한양수자인아파트 앞",'1924. 삼부르네상스파크빌', 
                    "1955. 디지털입구 교차로", "2012. 녹십초요양병원앞",
                   '2110. 조원동 미성아파트', '2113. 관악동작견인차량보관소','2115. 관악농협농산물백화점'))

fig.add_scatter(x= gudi_1911['date'], y= gudi_1911['count'], 
                name='1911. 구로디지털단지역 앞', row = 1, col = 1)
fig.add_scatter(x = gudi_1828['date'], y = gudi_1828['count'], 
                name = '1828. 한양수자인아파트 앞', row = 1, col = 2)
fig.add_scatter(x = gudi_1924['date'], y = gudi_1924['count'], 
                name = '1924. 삼부르네상스파크빌', row = 2, col = 1)
fig.add_scatter(x = gudi_1955['date'], y = gudi_1955['count'], 
                name = '1955. 디지털입구 교차로', row = 2, col = 2)
fig.add_scatter(x = gudi_2012['date'], y = gudi_2012['count'], 
                name = '2012. 녹십초요양병원앞',row = 3, col = 1)
fig.add_scatter(x = gudi_2110['date'], y = gudi_2110['count'], 
                name = '2110. 조원동 미성아파트', row = 3, col = 2)
fig.add_scatter(x = gudi_2113['date'], y = gudi_2113['count'], 
                name = '2113. 관악동작견인차량보관소', row = 4, col = 1)
fig.add_scatter(x = gudi_2115['date'], y = gudi_2115['count'], 
                name = '2115. 관악농협농산물백화점',row = 4, col = 2)


fig.update_layout(height=600, width=1100,
                  title_text="대여소 별 일일 대여건수",
                 showlegend = False)
fig.show()

In [11]:
gudi_metro = pd.read_csv('../따릉따릉/gudi.csv')

In [12]:
gudi_hacha = gudi_metro[gudi_metro['in out'] == 1]

In [13]:
gudi_hacha.sample(5)

Unnamed: 0,date,in out,08 ~ 09
429,2018-04-03,1.0,13536.0
251,2018-01-04,1.0,13112.0
569,2018-06-12,1.0,13128.0
627,2018-07-11,1.0,13207.0
335,2018-02-15,1.0,613.0


In [52]:
#지하철 이용객수와 대여건수의 상관관계
fig = make_subplots(
    rows=2, cols=1)

fig.add_scatter(x= gudi_1911['date'], y= gudi_bike.groupby('date')['count'].sum(), 
            name='따릉이 대여건수',
                marker_color = 'Green', row = 1, col = 1)

fig.add_scatter(x = gudi_hacha['date'], y = gudi_hacha["08 ~ 09"],
            name = '지하철 하차승객수', 
                marker_color = 'orange', row = 2, col = 1)

fig.update_layout(height=700, width=1200,
                  title_text="구디역 지하철 하차승객수와 따릉이 대여건수의 상관관계")


fig.show()

In [77]:
gudi_bike['month'] = 0
for i in range(len(gudi_bike)-1):
    gudi_bike['month'][i] = gudi_bike['date'][i].month



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy



KeyboardInterrupt: 

In [None]:
gudi_bike.sample

In [54]:
gudi_mean = gudi_bike.groupby('month')['count'].mean()
gudi_mean

month
0     0.698610
1     0.634409
2     0.455696
3     0.967742
4     1.733333
5     2.274194
6     4.033333
7     3.580645
8     4.483871
9     0.688889
10    0.698925
11    0.655556
12    0.376344
Name: count, dtype: float64

In [None]:
import plotly.express as px
fig2 = px.scatter_3d(gudi_bike, x='', y='place', z='count', size='count', color='place',
                    hover_data=['place'])
fig2.update_layout(scene_zaxis_type="log")
fig2.show()

In [56]:
plotly.offline.plot(fig2, filename='fig2_3d.html')

'fig2_3d.html'

In [57]:
bike_loc_metro = pd.read_csv('../따릉따릉/bike_loc_metro.csv')
bike_loc_metro.columns =  ['?', 'gu', 'place', 'station_name', 'lat', 'lng',
       'num_rack', 'nearest_metro', 'distance(km)']

In [69]:
bike_metro = pd.merge(gudi_bike, bike_loc_metro, on = 'place')

In [59]:
del bike_metro['?']

In [60]:
bike_metro.sample(5)

Unnamed: 0,date,place,count,holiday,day,month,gu,station_name,lat,lng,num_rack,nearest_metro,distance(km)
591,2018-04-15,1924,0,1,Sun,4,구로구,1924. 삼부르네상스파크빌,37.478741,126.895096,10,구로디지털단지,0.91429
1129,2017-10-05,2113,0,1,Thu,0,관악구,2113. 관악동작견인차량보관소,37.484661,126.9039,10,구로디지털단지,0.23053
827,2017-12-07,2115,0,0,Thu,12,관악구,2115. 관악농협농산물백화점,37.479916,126.90284,15,구로디지털단지,0.60829
2206,2017-09-17,2012,1,1,Sun,0,동작구,2012. 녹십초요양병원앞,37.488361,126.906227,10,구로디지털단지,0.5475
1167,2017-11-12,2113,0,1,Sun,0,관악구,2113. 관악동작견인차량보관소,37.484661,126.9039,10,구로디지털단지,0.23053


In [21]:
bike_metro['place'].unique()

array([1911, 1924, 2115, 2113, 2110, 1955, 2012, 1828], dtype=int64)

In [70]:
distance = bike_metro.groupby('station_name')['distance(km)'].mean().sort_values(ascending = False)
distance.index

Index(['1828. 한양수자인아파트 앞', '1924. 삼부르네상스파크빌', '2115. 관악농협농산물백화점',
       '2012. 녹십초요양병원앞', '1955. 디지털입구 교차로', '2110. 조원동 미성아파트',
       '2113. 관악동작견인차량보관소', '1911. 구로디지털단지역 앞'],
      dtype='object', name='station_name')

In [23]:
bike_loc = pd.read_excel('../따릉따릉/공공자전거 대여소 정보_201905.xlsx')
bike_loc.columns

Index(['구분', '대여소번호', '대여소명', '위도', '경도', '거치대수'], dtype='object')

In [24]:
import folium
import os
import leaflet
from folium import plugins

In [27]:
icon_subway = plugins.BeautifyIcon( icon='subway', 
                                 iconSize = [40,40], 
                                 backgroundColor = 'red', text_color='white', 
                                 icon_shape= 'marker', border_color = 'white')

In [28]:
#지도 시각화

gudi_map = folium.Map(location = [37.485266, 126.901401], zoom_start = 15)
folium.Marker([37.485266, 126.901401], popup = '구로디지털단지역', 
              tooltip = '구로디지털단지역',
              icon = icon_subway).add_to(gudi_map)

for n in bike_loc.index : 
    if bike_loc['대여소번호'][n] in bike_metro['place'].unique() :
        folium.Marker([bike_loc['위도'][n], bike_loc['경도'][n]], 
                      popup = bike_loc['대여소명'][n],
                      tooltip = bike_loc['대여소명'][n]).add_to(gudi_map)

gudi_map

In [403]:
icon_bike = plugins.BeautifyIcon( icon='bicycle', 
                                 iconSize = [40,40], 
                                 backgroundColor = 'green', text_color='white', 
                                 icon_shape= 'marker', border_color = 'white')

In [408]:
gudi_map.save('../따릉따릉/gudi_map.html')

In [61]:
fig = go.Figure()

fig.add_bar(x= bike_metro['station_name'].unique(), y= station_mean, 
            name='평균 대여건수', opacity=0.6, marker_color = 'yellow')
fig.add_bar(x = distance.index, y = distance.values, 
            name = '지하철역까지의 직선거리(km)', opacity=0.6, marker_color = 'green')

fig.update_layout(height=500, width=800,
                  title_text="정류소별 따릉이 대여건수와 지하철역 거리의 상관관계")
fig.show()

# 따릉이 일년치 모든 정류장 데이터로 시각화

In [30]:
seoul_bike = pd.read_csv('../따릉따릉/raw_data_201709_201808.csv')

In [31]:
seoul_bike.sample(5)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,bike_n,date,place,start_now,using_t,dist
7412227,7412227,7412227,SPB05949,2018-08-12 01:13:03,1117.0,1,4,890
7420785,7420785,7420785,SPB14004,2018-08-12 13:48:14,726.0,4,3,940
4831431,4831431,4831431,SPB10456,2018-06-01 11:26:15,332.0,3,8,920
6566362,6566362,6566362,SPB13746,2018-07-17 22:43:29,115.0,15,46,7500
582701,582701,582701,SPB10273,2017-09-21 18:03:00,202.0,8,17,1050


In [32]:
del seoul_bike['Unnamed: 0']
del seoul_bike['Unnamed: 0.1']

In [46]:
seoul_bike.shape

(8088851, 8)

In [59]:
seoul_bike['date'] = pd.to_datetime(seoul_bike['date'], format = "%Y-%m-%d")

In [60]:
bike_loc_metro = pd.read_csv('../따릉따릉/bike_loc_metro.csv')

In [61]:
df = pd.merge(seoul_bike, bike_loc_metro, on = 'place' )

In [62]:
df.head()

Unnamed: 0,Unnamed: 0_x,Unnamed: 0.1,bike_n,date,place,start_now,using_t,dist,Unnamed: 0_y,gu,station_name,lat,lng,num_rack,nearest_metro,distance(km)
0,0,0,SPB11045,2017-09-01 00:00:02,198.0,13,6,540,91,서대문구,198. 충정2교,37.562138,126.963776,15,충정로,0.24091
1,324,324,SPB02089,2017-09-01 00:21:56,198.0,6,5,700,91,서대문구,198. 충정2교,37.562138,126.963776,15,충정로,0.24091
2,584,584,SPB04570,2017-09-01 00:41:39,198.0,4,4,740,91,서대문구,198. 충정2교,37.562138,126.963776,15,충정로,0.24091
3,2594,2594,SPB01504,2017-09-01 06:27:44,198.0,14,5,1300,91,서대문구,198. 충정2교,37.562138,126.963776,15,충정로,0.24091
4,2602,2602,SPB06637,2017-09-01 06:28:13,198.0,2,8,2590,91,서대문구,198. 충정2교,37.562138,126.963776,15,충정로,0.24091


In [63]:
del df['Unnamed: 0_x']
del df['Unnamed: 0.1']

In [47]:
gudi_metro.columns

Index(['date', 'in out', '08 ~ 09'], dtype='object')

In [67]:
df3 = pd.read_csv("../따릉따릉/all_u_want_4sum.csv")
df3.sample(10)

Unnamed: 0,gu_name,year_month,count,dist,using_t
283,중구,201804,659.314286,2377185.0,15931.371429
94,금천구,201807,412.0,2099807.0,10579.675676
107,노원구,201808,785.224138,3285792.0,18520.310345
266,종로구,201711,810.423077,1481864.0,13199.769231
62,광진구,201711,669.658537,1544219.0,13007.97561
241,용산구,201710,1010.857143,4400278.0,37103.666667
247,용산구,201804,507.16129,2907671.0,17611.806452
2,강남구,201711,335.118644,1152358.0,8829.186441
85,금천구,201710,343.2,1675919.0,12729.32
202,성북구,201807,729.304348,2998507.0,17027.26087


In [35]:
df3['year_month'] = pd.to_datetime(df3['year_month'], format = "%Y%m")

In [36]:
fig3 = px.scatter_3d(df3, x='gu_name', y='year_month', z='count', size='count', color='gu_name',
                    hover_data=['gu_name'])
fig3.update_layout(scene_zaxis_type="log")
fig3.show()

In [37]:
plotly.offline.plot(fig3, filename='fig3.html')

'fig3.html'

In [63]:
df3['year_month'].unique()

array(['2017-09-01T00:00:00.000000000', '2017-10-01T00:00:00.000000000',
       '2017-11-01T00:00:00.000000000', '2017-12-01T00:00:00.000000000',
       '2018-01-01T00:00:00.000000000', '2018-02-01T00:00:00.000000000',
       '2018-03-01T00:00:00.000000000', '2018-04-01T00:00:00.000000000',
       '2018-05-01T00:00:00.000000000', '2018-06-01T00:00:00.000000000',
       '2018-07-01T00:00:00.000000000', '2018-08-01T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [68]:
import plotly.express as px
#"year_month" 컬럼 날짜형으로 안바꿔야 에러가 안남

fig_anim = px.scatter(df3, x= "dist", y= "using_t", animation_frame= 'year_month', animation_group="gu_name",
           size="count", color="gu_name", hover_name="gu_name",
           log_x=True, size_max=45) #range_x=[1,100], range_y=[100,100000])
fig_anim.show()

In [172]:
plotly.offline.plot(fig_anim, filename='fig_anim.html')

'fig_anim.html'

In [216]:
from PIL import Image

im1 = Image.open('../따릉따릉/gif/캡처1.png')
im2 = Image.open('../따릉따릉/gif/캡처2.png')
im3 = Image.open('../따릉따릉/gif/캡처3.png')
im4 = Image.open('../따릉따릉/gif/캡처4.png')
im5 = Image.open('../따릉따릉/gif/캡처5.png')
im6 = Image.open('../따릉따릉/gif/캡처6.png')
im7 = Image.open('../따릉따릉/gif/캡처7.png')
im8 = Image.open('../따릉따릉/gif/캡처8.png')
im9 = Image.open('../따릉따릉/gif/캡처9.png')
im10 = Image.open('../따릉따릉/gif/캡처10.png')
im11 = Image.open('../따릉따릉/gif/캡처11.png')
im12 = Image.open('../따릉따릉/gif/캡처12.png')
im1.save("fig_anim.gif", save_all=True, append_images=[im1, im2, im3, im4, im5, im6, im7, im8, im9, im10, im11], 
          duration=300, loop=0)