## Configure environment and install dependecies

In [0]:
!pip install -q transformers

In [0]:
from __future__ import print_function
from transformers import pipeline
from google.colab import files
from transformers import BertConfig
import numpy as np
import tensorflow as tf
from tensorflow.keras import datasets, layers, models
import json
import math
import random

In [0]:
files.upload() # upload model.h5

In [0]:
config = BertConfig.from_pretrained('bert-base-cased', num_labels=5)
get_features = pipeline('feature-extraction','bert-base-cased',config=config)
model = models.load_model('model.h5')
model.summary()
result_strings = ['politics','society','technology','economy', 'sports']
def predict_news_type(passage):
    feats = np.array(get_features(passage, pad_to_max_length=True))
    res = model.predict(feats)[0]
    prediction = []
    for i in range(len(res)):
        prediction.append((result_strings[i], res[i]))
    prediction.sort(key=lambda x : x[1], reverse=True)
    return prediction

HBox(children=(IntProgress(value=0, description='Downloading', max=230, style=ProgressStyle(description_width=…


Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_4 (Dropout)          (None, 512, 768, 1)       0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 511, 1, 32)        49184     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 10, 1, 32)         0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 320)               0         
_________________________________________________________________
dropout_5 (Dropout)          (None, 320)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 64)                20544     
_________________________________________________________________
dense_5 (Dense)              (None, 5)               

## Show cases

In [0]:
# techology news
technology_news = """
A coronavirus vaccine trial by University of Oxford researchers aims to get efficacy results by September, and manufacturing is already underway.

A team led by Sarah Gilbert, a professor of vaccinology, has recruited 500 volunteers from the ages of 18 to 55 for the early- and mid-stage randomized controlled trial. It will be extended to older adults and then to a final stage trial of 5,000 people. Gilbert said that the timing is ambitious but achievable.

“We would hope to have at least some doses that are ready to be used by September,” she said in an interview. “There won’t be enough for everywhere by then, but the more manufacturing we can do starting from now, then the more doses there will be.”
"""
predict_news_type(technology_news)

[('technology', 9.080535),
 ('society', 2.8555765),
 ('sports', 0.47508347),
 ('politics', 0.0),
 ('economy', 0.0)]

In [0]:
# sports news
sports_news = """
Twenty-two people in Japan are being treated for the illness, some of whom are Chinese tourists who were taken ill after arriving in the country, and with little sign that health experts are bringing the outbreak under control, there have already been rumors that the Japanese government is considering cancelling the Olympics.

Read more: Coronavirus: Everything you need to know

The subject "Tokyo Olympics will be cancelled due to coronavirus" has been trending on Japanese Twitter feeds, with one message declaring that the Games "need to be cancelled to limit the spread of the coronavirus. There is a good chance that the virus will break out if millions of Chinese come to see the event."

Another Twitter message said, "The Tokyo Olympics should be cancelled or postponed until next year," while another message pointed out that the World Athletics Indoor Championship, due to be held in the Chinese city of Nanjing in March, has been put off until next year.
"""
predict_news_type(sports_news)

[('sports', 3.5492618),
 ('technology', 1.7592037),
 ('politics', 0.0),
 ('society', 0.0),
 ('economy', 0.0)]

In [0]:
# economy news
economy_news = """
The outbreak of the coronavirus could deal a blow to the global economy, threatening consumption and tourism. It also risks undermining business sentiment.

History offers a guide to how developments might unfold.

Most economic studies of the impact of pandemics such as the SARS virus in 2003 have been undertaken by epidemiologists who tend to focus on the cost-benefit of vaccination.

There are only a handful that analyse the broader economic impact. Still, a 2006 study by the World Bank provides some useful insight.

While the World Bank model is based upon a flu-style epidemic similar to that in 1918 – which killed 50 million people and affected up to a fifth of the global population - its assumptions are relatively conservative: a fatality rate of 2.5 per cent and a 20 per cent decline in tourism and services output (restaurants, air travel and other non-essential consumer spending).1

 Its epidemiological assumptions are more or less in keeping with what is currently estimated for the coronavirus, known as 2019-nCoV.

According to the model,  world output would shrink by as much as 3 per cent were 2019-nCoV to spread as broadly as the 1918 epidemic, with differences between regions fairly negligible. For comparison, in the aftermath of the US subprime mortgage crisis, global GDP contracted by 0.1 per cent in 2009, with advanced economies shrinking 3.4 per cent and developing markets expanding by 2.9 per cent.
"""
predict_news_type(economy_news)

[('economy', 8.898969),
 ('technology', 6.087762),
 ('society', 2.6641047),
 ('sports', 0.28453285),
 ('politics', 0.0)]

In [0]:
politics_news = """
The second meeting of the Emergency Committee convened by the WHO Director-General under the International Health Regulations (IHR) (2005) regarding the outbreak of novel coronavirus 2019 in the People’s Republic of China, with exportations to other countries, took place on Thursday, 30 January 2020, from 13:30 to 18:35 Geneva time (CEST). The Committee’s role is to give advice to the Director-General, who makes the final decision on the determination of a Public Health Emergency of International Concern (PHEIC). The Committee also provides public health advice or suggests formal Temporary Recommendations as appropriate. 

Proceedings of the meeting
Members and advisors of the Emergency Committee were convened by teleconference

The Director-General welcomed the Committee and thanked them for their support. He turned the meeting over to the Chair, Professor Didier Houssin. 

Professor Houssin also welcomed the Committee and gave the floor to the Secretariat. 

A representative of the department of compliance, risk management, and ethics briefed the Committee members on their roles and responsibilities.

Committee members were reminded of their duty of confidentiality and their responsibility to disclose personal, financial, or professional connections that might be seen to constitute a conflict of interest. Each member who was present was surveyed and no conflicts of interest were judged to be relevant to the meeting. There were no changes since the previous meeting.  

The Chair then reviewed the agenda for the meeting and introduced the presenters. 
"""
predict_news_type(politics_news)

[('politics', 3.864341),
 ('sports', 0.34437746),
 ('society', 0.28189865),
 ('technology', 0.0),
 ('economy', 0.0)]

## Playground

In [0]:
#@title 英文新闻分类
passage = "And a still more precise approximation tells us the asymptotic relative error: Stirling's formula undershoots n! by a factor of about 1/(12n). Even for fairly small n this more precise estimate is pretty good. For example, Stirling's approximation (4.23) gives a value near 3598696 when n = 10, and this is about 0.83% \u2248 1/120 too small. Good stuff, asymptotics." #@param {type:"string"}

predict_news_type(passage)


[('technology', 5.6692886),
 ('economy', 3.4878316),
 ('society', 2.7998297),
 ('sports', 0.5698351),
 ('politics', 0.0)]