# 主題：政治新聞資料分析
- 組員：王冠人、萬俊彥、廖品琪
- 時間：2019/01/29

## 研究目標
考量臺灣社會政治冷感的現狀，提供一個較為客觀的資料統整，藉由日常生活中的事件觀察政治人物的網路聲量變化，作為一種參與政治的方式。

## 研究方法
1. 爬蟲搜尋網路新聞
2. 進行資料清理及資料分析
4. 視覺化呈現研究結果

## 研究內容及結果

### Part I 爬蟲

- [新頭殼新聞爬蟲](/notebooks/political/crawler/new_talk_crawler.ipynb)
- [自由時報新聞爬蟲](/notebooks/political/crawler/liberty_times_crawler.ipynb)

###  Part II 資料清洗及分析

- []()

### Part III 視覺化呈現研究結果

In [18]:
# import packages
import math
import plotly.plotly as py
import plotly.graph_objs as go
import ipywidgets as widgets
from IPython.display import clear_output

In [2]:
# import data
import json

#### Section I 政治事件網路聲量分析

In [3]:
def get_scatter_trace(points_x, points_y, points_size = 50, trace_name = ''):
    return go.Scatter(
        x = points_x,
        y = points_y,
        mode = 'markers',
        marker = {
            'size': points_size
        },
        hoverinfo = 'text+name',
        name = trace_name,
        customdata = points_size,
        text=points_size
    )

In [14]:
events = json.loads(open('event_tfdf_dict.json').read())
events

{'九二共識': {'2018-11-27': 0.00047779497618490654,
  '2018-12-11': 0.0011198556630478739,
  '2019-01-02': 0.000594875366356171,
  '2019-01-03': 0.0009785214540828807,
  '2019-01-04': 0.0010009700759377913,
  '2019-01-05': 0.0021587700031377474,
  '2019-01-06': 0.0024804303707760206,
  '2019-01-07': 0.002142680548306459,
  '2019-01-08': 0.0007637399759128161,
  '2019-01-09': 0.0009532888465204956,
  '2019-01-10': 0.000895789787996417,
  '2019-01-17': 0.0005603799611260894},
 '轉型正義': {'2018-07-21': 0.001509933774834437,
  '2018-09-12': 0.0011924252346649203,
  '2018-09-13': 0.0003632805404676943,
  '2018-09-15': 0.0005937008341496719,
  '2018-09-21': 0.0006298149351794468,
  '2018-10-06': 0.0005189296511890257,
  '2018-10-28': 0.000254968128983877},
 '立委補選': {'2018-12-01': 0.001678610991793457,
  '2018-12-04': 0.00085056853791745,
  '2018-12-05': 0.0006364180892482777,
  '2018-12-06': 0.000275930032027593,
  '2018-12-12': 0.000531512988197285,
  '2018-12-14': 0.0003341444811680529,
  '2018-

In [24]:
def sectionI_plot(ori_data, title=''):
    
    # data
    fig_data = []
    for index, name in enumerate(ori_data):
        x, y, size = [], [], []
        for date, tfdf_val in ori_data[name].items():
            x.append(datetime.strptime(date, "%Y-%m-%d"))
            y.append(len(ori_data) - index)
            size.append(max(math.log(round(tfdf_val*10000, 2))*10, 0)) # constants need change
        fig_data.append(get_scatter_trace(x, y, size, trace_name=name))
        
    # layout
    fig_layout = go.Layout(
        title=title,
        xaxis=dict(
            title='Date'
        ),
        yaxis=dict(
            title='Event'
        )
    )

    fig = go.Figure(data=fig_data, layout=fig_layout)
    display(py.iplot(fig, filename='bubble-chart'))

sectionI_plot(events)

#### Section II 事件相關人聲量分析

In [5]:
from datetime import datetime, timedelta
event_people_relation = json.loads(open('event_people_dict.json').read())

In [29]:
def sectionII_plot(ori_data, selector_widget, start_date='2018-06-07', title=''):
    start_date = datetime.strptime(start_date, "%Y-%m-%d")
    
    # data
    fig_data = []
    for index, name in enumerate(ori_data):
        x, y, size = [], [], []
        for day_index, tfdf_val in enumerate(ori_data[name]):
            if tfdf_val != 0:
                x.append(start_date + timedelta(days=day_index))
                y.append(len(ori_data) - index)
                size.append(max(math.log(round(tfdf_val, 2))*10,0)) ## constants need change
        fig_data.append(get_scatter_trace(x, y, size, trace_name=name))
        
    # layout
    fig_layout = go.Layout(
        title=title,
        xaxis=dict(
            title='Date',
            titlefont=dict(
                size=18
            )
        ),
        yaxis=dict(
            title='People',
            titlefont=dict(
                size=18
            )
        )
    )

    fig = go.Figure(data=fig_data, layout=fig_layout)

    clear_output()
    display(selector_widget)
    display(py.iplot(fig, filename='bubble-chart'))


In [30]:
event_selector = widgets.Dropdown(
    options=list(events.keys()),
    description='政治事件：'
)
display(event_selector)

def event_change(change):
    top_10_related_people = dict(sorted(event_people_relation[change['new']].items(), key=lambda x: sum(x[1]), reverse=True)[:10])
    sectionII_plot(top_10_related_people, event_selector, title=change['new'])

event_change({'new': list(events.keys())[0]})
    
event_selector.observe(event_change, names='value')

Dropdown(description='政治事件：', options=('九二共識', '轉型正義', '立委補選', '九合一選舉', '兩岸一家親', '深澳電廠', '造勢晚會', '台大校長', '雙城論壇…

#### Section III 人物聲量與事件疊圖分析