In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
!pip install pysummarization

Collecting pysummarization
[?25l  Downloading https://files.pythonhosted.org/packages/7b/38/131f8574e0e12f27fa2d35b11a91055a67c8e55b205a505669c6df7881cb/pysummarization-1.1.4.tar.gz (57kB)
[K     |█████▊                          | 10kB 15.3MB/s eta 0:00:01[K     |███████████▍                    | 20kB 1.8MB/s eta 0:00:01[K     |█████████████████               | 30kB 2.4MB/s eta 0:00:01[K     |██████████████████████▉         | 40kB 1.7MB/s eta 0:00:01[K     |████████████████████████████▌   | 51kB 2.0MB/s eta 0:00:01[K     |████████████████████████████████| 61kB 1.9MB/s 
Building wheels for collected packages: pysummarization
  Building wheel for pysummarization (setup.py) ... [?25l[?25hdone
  Created wheel for pysummarization: filename=pysummarization-1.1.4-cp36-none-any.whl size=58337 sha256=caaa7b20d54a703b2a98fe5e1eb3211cde24b6cfe9b5327d29d1fbe3775c23b1
  Stored in directory: /root/.cache/pip/wheels/84/43/1c/575498c1b472967ba3f395edd26826fa095b52cee9553e52ce
Successful

In [6]:
!pip install pyquery # the package is needed for web-scraping

Collecting pyquery
  Downloading https://files.pythonhosted.org/packages/78/43/95d42e386c61cb639d1a0b94f0c0b9f0b7d6b981ad3c043a836c8b5bc68b/pyquery-1.4.1-py2.py3-none-any.whl
Collecting cssselect>0.7.9
  Downloading https://files.pythonhosted.org/packages/3b/d4/3b5c17f00cce85b9a1e6f91096e1cc8e8ede2e1be8e96b87ce1ed09e92c5/cssselect-1.1.0-py2.py3-none-any.whl
Installing collected packages: cssselect, pyquery
Successfully installed cssselect-1.1.0 pyquery-1.4.1


In [0]:
from pysummarization.web_scraping import WebScraping

web_scraper = WebScraping()

In [0]:
def scrape_page(url):
  return web_scraper.scrape(url)

In [0]:
article = scrape_page('https://www.bbc.com/future/article/20180104-is-social-media-bad-for-you-the-evidence-and-the-unknowns')

In [0]:
from pysummarization.nlp_base import NlpBase
from pysummarization.tokenizabledoc.simple_tokenizer import SimpleTokenizer

In [0]:
nlp_base = NlpBase()
nlp_base.delimiter_list = [".", "\n"]
tokenizable_doc = SimpleTokenizer()
sentences = nlp_base.listup_sentence(article)
tokens = tokenizable_doc.tokenize(article)

In [20]:
!pip install pydbm==1.5.1

Collecting pydbm==1.5.1
[?25l  Downloading https://files.pythonhosted.org/packages/78/cf/a19f4c62c134acb9b2660b92a9ebd35835360aae42fcc606f50cb10d9abc/pydbm-1.5.1.tar.gz (6.9MB)
[K     |████████████████████████████████| 6.9MB 2.8MB/s 
Building wheels for collected packages: pydbm
  Building wheel for pydbm (setup.py) ... [?25l[?25hdone
  Created wheel for pydbm: filename=pydbm-1.5.1-cp36-cp36m-linux_x86_64.whl size=17028617 sha256=4cd39d993b889b4aa0d5e00614e892878dab5598fba11986c51fcfbb18c64f18
  Stored in directory: /root/.cache/pip/wheels/87/b8/77/06fb2ad0ba66fc3650dc88b3efcb096d9ab7439bc4a592311e
Successfully built pydbm
Installing collected packages: pydbm
Successfully installed pydbm-1.5.1


In [0]:
from pysummarization.vectorizabletoken.skip_gram_vectorizer import SkipGramVectorizer

In [0]:
skip_gram_vectorizer = SkipGramVectorizer(
    token_list=tokens,
    epochs=1000,
    skip_n=2,
    batch_size=20,
    feature_dim=500,
    learning_rate=1e-06,
    scale=1e-05,
)

In [0]:
skip_gram_vectorizer.learn()

In [0]:
vector_list = skip_gram_vectorizer.vectorize(token_list=tokens)
vector_arr = np.array(vector_list)

In [37]:
observed_list = []

seq_len = 5

for i in range(seq_len, vector_arr.shape[0]):
    observed_list.append(vector_arr[i - seq_len : i])
observed_arr = np.array(observed_list)
observed_arr.shape

(2376, 5, 500)

In [0]:
from pysummarization.abstractablesemantics.re_seq_2_seq import ReSeq2Seq

abstractable_semantics = ReSeq2Seq(
    margin_param=0.01,
    retrospective_lambda=0.5,
    retrospective_eta=0.5,
    encoder_decoder_controller=None,
    retrospective_encoder=None,
    input_neuron_count=observed_arr.shape[-1],
    hidden_neuron_count=500,
    weight_limit=1e+10,
    dropout_rate=0.5,
    pre_learning_epochs=1000,
    epochs=1000,
    batch_size=20,
    learning_rate=1e-02,
    learning_attenuate_rate=1.0,
    attenuate_epoch=50,
    seq_len=seq_len,
    bptt_tau=seq_len,
    test_size_rate=0.3,
    tol=0.0
)

In [0]:
abstractable_semantics.learn(observed_arr=observed_arr, target_arr=observed_arr)

In [39]:
abstract_list = abstractable_semantics.summarize(
    observed_arr,
    skip_gram_vectorizer,
    sentences,
    limit=5
)

for a in abstract_list[:5]:
    print(a)

Spending more time on social media, the researchers said, could displace face-to-face interaction, and can also make people feel excluded.

That said, it’s unclear if and how social media causes anxiety.

One of the worst times to use social media may be just before bed (Credit: Getty Images)

That said, social media is changing faster than scientists can keep up with, so various groups are trying to study compulsive behaviours related to its use – for example, scientists from the Netherlands have invented their own scale to identify possible addiction.

 But they couldn’t clarify whether social media causes disturbed sleep, or if those who have disturbed sleep spend more time on social media.

