In [1]:
import pandas as pd
import numpy as np

from pytrends.request import TrendReq
from pytrends.exceptions import ResponseError

import time
import os

In [2]:
!ls ./Data

bbc_news.csv                          description-dbscan-clusters.csv
bbc_news_new.csv                      description-kmeans-clusters.csv
combined-agg-complete-clusters.csv    description-optics-clusters.csv
combined-agg-ward-clusters.csv        title-agg-complete-clusters.csv
combined-dbscan-clusters.csv          title-agg-ward-clusters.csv
combined-kmeans-clusters.csv          title-dbscan-clusters.csv
combined-optics-clusters.csv          title-kmeans-clusters.csv
description-agg-complete-clusters.csv title-optics-clusters.csv
description-agg-ward-clusters.csv     [34mtopics[m[m


In [3]:
file_names = list(filter(lambda x: x.endswith('-clusters.csv') and x not in os.listdir('./Data/topics/'), os.listdir('./Data')))
file_names

['description-agg-complete-clusters.csv',
 'title-agg-complete-clusters.csv',
 'description-dbscan-clusters.csv',
 'combined-optics-clusters.csv',
 'description-kmeans-clusters.csv',
 'title-optics-clusters.csv',
 'description-agg-ward-clusters.csv',
 'title-kmeans-clusters.csv',
 'title-dbscan-clusters.csv',
 'combined-kmeans-clusters.csv',
 'combined-dbscan-clusters.csv',
 'combined-agg-ward-clusters.csv',
 'title-agg-ward-clusters.csv',
 'description-optics-clusters.csv']

In [4]:
datas = {k: None for k in file_names}
datas

{'description-agg-complete-clusters.csv': None,
 'title-agg-complete-clusters.csv': None,
 'description-dbscan-clusters.csv': None,
 'combined-optics-clusters.csv': None,
 'description-kmeans-clusters.csv': None,
 'title-optics-clusters.csv': None,
 'description-agg-ward-clusters.csv': None,
 'title-kmeans-clusters.csv': None,
 'title-dbscan-clusters.csv': None,
 'combined-kmeans-clusters.csv': None,
 'combined-dbscan-clusters.csv': None,
 'combined-agg-ward-clusters.csv': None,
 'title-agg-ward-clusters.csv': None,
 'description-optics-clusters.csv': None}

In [5]:
for k in file_names:
    datas[k] = pd.read_csv(f'./Data/{k}')
datas

{'description-agg-complete-clusters.csv':             word     tfidf  label
 0         forget  0.572116      8
 1        worried  0.584743      8
 2        extreme  0.533802      8
 3        helping  1.000000      8
 4           done  0.602631      8
 ...          ...       ...    ...
 1372  vulnerable  0.593579      9
 1373      launch  0.658241      9
 1374        book  0.812209      9
 1375     decider  0.479084      9
 1376        wild  0.742275      9
 
 [1377 rows x 3 columns],
 'title-agg-complete-clusters.csv':            word     tfidf  label
 0        punish  0.547101      6
 1        global  0.709860      6
 2         arena  0.614520      6
 3       highest  0.577350      6
 4         grant  0.568683      6
 ...         ...       ...    ...
 1561       year  0.918845      7
 1562     esteem  0.506666      7
 1563  childhood  0.597876      7
 1564    slavery  0.521664      7
 1565       epic  0.791599      7
 
 [1566 rows x 3 columns],
 'description-dbscan-clusters.csv':     

In [6]:
trends = TrendReq(hl='en-US', tz=360)
trends

<pytrends.request.TrendReq at 0x7fee6a238550>

In [7]:
def infer_topic(data):
    topics_data = pd.DataFrame()
    for kw in data['word']:
        print('Starting: ', kw)
        
        backoff = 1.0001
        while True:
            try:
                trends.build_payload(kw_list=[kw])
                related_topics = trends.related_topics()[kw]['top'][['topic_title', 'topic_type']][:3].T
                break
            except ResponseError as e:
                print('Retrying:', e)
                time.sleep(1 * backoff)
                if backoff < 100:
                    backoff = pow(backoff, 2)
            except Exception as e:
                print('Bad situation...')
                time.sleep(10)
            print('Using backoff: ', backoff)
        n = min(len(related_topics.loc['topic_title', :]), len(related_topics.loc['topic_type', :]))
        tmp = pd.DataFrame(pd.concat([related_topics.loc['topic_title', :], related_topics.loc['topic_type', :]])).T
        tmp.columns = [f'topic_title_{c}' for c in tmp.columns[:n]] + [f'topic_type_{c}' for c in tmp.columns[n:]]

        topics_data = topics_data.append(tmp)
        
        print('Finished: ', kw)
        print('*' * 20)
        time.sleep(1)
    return topics_data

In [8]:
topics = {k: None for k in file_names}
topics

{'description-agg-complete-clusters.csv': None,
 'title-agg-complete-clusters.csv': None,
 'description-dbscan-clusters.csv': None,
 'combined-optics-clusters.csv': None,
 'description-kmeans-clusters.csv': None,
 'title-optics-clusters.csv': None,
 'description-agg-ward-clusters.csv': None,
 'title-kmeans-clusters.csv': None,
 'title-dbscan-clusters.csv': None,
 'combined-kmeans-clusters.csv': None,
 'combined-dbscan-clusters.csv': None,
 'combined-agg-ward-clusters.csv': None,
 'title-agg-ward-clusters.csv': None,
 'description-optics-clusters.csv': None}

In [9]:
for k, d in datas.items():
    topics[k] = infer_topic(d)
    topics[k].to_csv(f'./Data/topics/{k}', index=False)

Starting:  forget
Finished:  forget
********************
Starting:  worried
Finished:  worried
********************
Starting:  extreme
Finished:  extreme
********************
Starting:  helping
Finished:  helping
********************
Starting:  done
Finished:  done
********************
Starting:  harder
Finished:  harder
********************
Starting:  operation
Finished:  operation
********************
Starting:  conflict
Finished:  conflict
********************
Starting:  crisis
Finished:  crisis
********************
Starting:  anthem
Finished:  anthem
********************
Starting:  default
Finished:  default
********************
Starting:  accused
Finished:  accused
********************
Starting:  female
Finished:  female
********************
Starting:  trend
Finished:  trend
********************
Starting:  opposition
Finished:  opposition
********************
Starting:  mayor
Finished:  mayor
********************
Starting:  string
Finished:  string
********************
Starting:  

Using backoff:  1.000400060004
Retrying: The request failed: Google returned a response with code 429.
Using backoff:  1.0008002800560067
Finished:  sound
********************
Starting:  correspondent
Finished:  correspondent
********************
Starting:  ad
Retrying: The request failed: Google returned a response with code 429.
Using backoff:  1.00020001
Finished:  ad
********************
Starting:  ground
Finished:  ground
********************
Starting:  happen
Retrying: The request failed: Google returned a response with code 429.
Using backoff:  1.00020001
Retrying: The request failed: Google returned a response with code 429.
Using backoff:  1.000400060004
Retrying: The request failed: Google returned a response with code 429.
Using backoff:  1.0008002800560067
Finished:  happen
********************
Starting:  screen
Finished:  screen
********************
Starting:  increasing
Finished:  increasing
********************
Starting:  writer
Finished:  writer
********************
Sta

Starting:  financial
Finished:  financial
********************
Starting:  borrow
Finished:  borrow
********************
Starting:  rain
Finished:  rain
********************
Starting:  window
Finished:  window
********************
Starting:  superb
Finished:  superb
********************
Starting:  freedom
Finished:  freedom
********************
Starting:  older
Finished:  older
********************
Starting:  appear
Finished:  appear
********************
Starting:  department
Finished:  department
********************
Starting:  escape
Finished:  escape
********************
Starting:  pregnant
Finished:  pregnant
********************
Starting:  remarkable
Finished:  remarkable
********************
Starting:  part
Finished:  part
********************
Starting:  unlikely
Finished:  unlikely
********************
Starting:  transport
Finished:  transport
********************
Starting:  regret
Finished:  regret
********************
Starting:  film
Finished:  film
********************
Startin

Finished:  producer
********************
Starting:  payment
Finished:  payment
********************
Starting:  leader
Finished:  leader
********************
Starting:  causing
Finished:  causing
********************
Starting:  process
Finished:  process
********************
Starting:  decided
Finished:  decided
********************
Starting:  flee
Finished:  flee
********************
Starting:  half
Finished:  half
********************
Starting:  shift
Finished:  shift
********************
Starting:  heading
Finished:  heading
********************
Starting:  emergency
Finished:  emergency
********************
Starting:  pub
Finished:  pub
********************
Starting:  ancient
Finished:  ancient
********************
Starting:  home
Finished:  home
********************
Starting:  suffer
Finished:  suffer
********************
Starting:  taking
Finished:  taking
********************
Starting:  retired
Finished:  retired
********************
Starting:  home
Finished:  home
***************

Finished:  majority
********************
Starting:  majority
Finished:  majority
********************
Starting:  restriction
Finished:  restriction
********************
Starting:  swimming
Finished:  swimming
********************
Starting:  landmark
Finished:  landmark
********************
Starting:  inside
Finished:  inside
********************
Starting:  direct
Finished:  direct
********************
Starting:  curb
Finished:  curb
********************
Starting:  state
Finished:  state
********************
Starting:  equipment
Finished:  equipment
********************
Starting:  offensive
Finished:  offensive
********************
Starting:  slump
Finished:  slump
********************
Starting:  resign
Finished:  resign
********************
Starting:  limit
Finished:  limit
********************
Starting:  apart
Finished:  apart
********************
Starting:  amazing
Finished:  amazing
********************
Starting:  middle
Finished:  middle
********************
Starting:  acting
Finis

Starting:  crown
Finished:  crown
********************
Starting:  lost
Finished:  lost
********************
Starting:  fallen
Finished:  fallen
********************
Starting:  selection
Finished:  selection
********************
Starting:  appointment
Finished:  appointment
********************
Starting:  survivor
Finished:  survivor
********************
Starting:  recall
Finished:  recall
********************
Starting:  village
Finished:  village
********************
Starting:  survivor
Finished:  survivor
********************
Starting:  railway
Finished:  railway
********************
Starting:  agreement
Finished:  agreement
********************
Starting:  importance
Finished:  importance
********************
Starting:  orchestra
Finished:  orchestra
********************
Starting:  tournament
Finished:  tournament
********************
Starting:  chase
Finished:  chase
********************
Starting:  crowned
Finished:  crowned
********************
Starting:  defined
Finished:  defined


Starting:  surviving
Finished:  surviving
********************
Starting:  miss
Finished:  miss
********************
Starting:  describe
Finished:  describe
********************
Starting:  roger
Finished:  roger
********************
Starting:  musical
Finished:  musical
********************
Starting:  year
Finished:  year
********************
Starting:  presenter
Finished:  presenter
********************
Starting:  inspired
Finished:  inspired
********************
Starting:  based
Finished:  based
********************
Starting:  building
Finished:  building
********************
Starting:  electricity
Finished:  electricity
********************
Starting:  undisputed
Finished:  undisputed
********************
Starting:  time
Finished:  time
********************
Starting:  producer
Finished:  producer
********************
Starting:  jockey
Finished:  jockey
********************
Starting:  fallen
Finished:  fallen
********************
Starting:  year
Finished:  year
********************
Sta

Starting:  fourth
Finished:  fourth
********************
Starting:  nature
Finished:  nature
********************
Starting:  ownership
Finished:  ownership
********************
Starting:  globe
Finished:  globe
********************
Starting:  listen
Finished:  listen
********************
Starting:  spotted
Finished:  spotted
********************
Starting:  dance
Finished:  dance
********************
Starting:  conte
Finished:  conte
********************
Starting:  collapse
Finished:  collapse
********************
Starting:  shed
Finished:  shed
********************
Starting:  culture
Finished:  culture
********************
Starting:  pair
Finished:  pair
********************
Starting:  arrest
Finished:  arrest
********************
Starting:  kelly
Finished:  kelly
********************
Starting:  sunk
Finished:  sunk
********************
Starting:  electricity
Finished:  electricity
********************
Starting:  fringe
Finished:  fringe
********************
Starting:  track
Finished: 

Starting:  pilot
Finished:  pilot
********************
Starting:  affect
Finished:  affect
********************
Starting:  coping
Finished:  coping
********************
Starting:  jet
Finished:  jet
********************
Starting:  rising
Finished:  rising
********************
Starting:  light
Finished:  light
********************
Starting:  decorated
Finished:  decorated
********************
Starting:  relation
Finished:  relation
********************
Starting:  crucial
Finished:  crucial
********************
Starting:  budget
Finished:  budget
********************
Starting:  martin
Finished:  martin
********************
Starting:  theyll
Finished:  theyll
********************
Starting:  ongoing
Finished:  ongoing
********************
Starting:  joining
Finished:  joining
********************
Starting:  handling
Finished:  handling
********************
Starting:  landmark
Finished:  landmark
********************
Starting:  popularity
Finished:  popularity
********************
Starting:

Finished:  flew
********************
Starting:  shelling
Finished:  shelling
********************
Starting:  wave
Finished:  wave
********************
Starting:  victim
Finished:  victim
********************
Starting:  van
Finished:  van
********************
Starting:  tackling
Finished:  tackling
********************
Starting:  bailey
Finished:  bailey
********************
Starting:  antiwar
Finished:  antiwar
********************
Starting:  wearing
Finished:  wearing
********************
Starting:  prison
Finished:  prison
********************
Starting:  prison
Finished:  prison
********************
Starting:  rescuer
Finished:  rescuer
********************
Starting:  driving
Finished:  driving
********************
Starting:  suspect
Finished:  suspect
********************
Starting:  worried
Finished:  worried
********************
Starting:  choose
Finished:  choose
********************
Starting:  german
Finished:  german
********************
Starting:  failing
Finished:  failing
***

Starting:  card
Finished:  card
********************
Starting:  man
Finished:  man
********************
Starting:  bar
Finished:  bar
********************
Starting:  vehicle
Finished:  vehicle
********************
Starting:  suspicion
Finished:  suspicion
********************
Starting:  moon
Finished:  moon
********************
Starting:  training
Finished:  training
********************
Starting:  metropolitan
Finished:  metropolitan
********************
Starting:  looking
Finished:  looking
********************
Starting:  stabbing
Finished:  stabbing
********************
Starting:  canada
Finished:  canada
********************
Starting:  car
Finished:  car
********************
Starting:  reveal
Finished:  reveal
********************
Starting:  tried
Finished:  tried
********************
Starting:  field
Finished:  field
********************
Starting:  incoming
Finished:  incoming
********************
Starting:  investigating
Finished:  investigating
********************
Starting:  su

Starting:  free
Finished:  free
********************
Starting:  abroad
Finished:  abroad
********************
Starting:  vulnerable
Finished:  vulnerable
********************
Starting:  dead
Finished:  dead
********************
Starting:  danger
Finished:  danger
********************
Starting:  caught
Finished:  caught
********************
Starting:  natural
Finished:  natural
********************
Starting:  small
Finished:  small
********************
Starting:  fled
Finished:  fled
********************
Starting:  panther
Finished:  panther
********************
Starting:  copyright
Finished:  copyright
********************
Starting:  grime
Finished:  grime
********************
Starting:  scan
Finished:  scan
********************
Starting:  corrie
Finished:  corrie
********************
Starting:  pick
Finished:  pick
********************
Starting:  rest
Finished:  rest
********************
Starting:  added
Finished:  added
********************
Starting:  standing
Finished:  standing
***

KeyboardInterrupt: 

In [None]:
!mkdir ./Data/topics

In [None]:
for k, v in topics.items():
    v.to_csv(f'./Data/topics/{k}')