# Note:
Due to Google Trends blocking scraping with a for loop, the list has been broken down into 3 items (or columns from a dataframe) at a time.

In [1]:
import pandas as pd
import datetime      
from pytrends.request import TrendReq
import time

In [2]:
df = pd.read_csv('google_trend_keyword.csv')
output_path = '../../../data/google_trends/'

In [3]:
df.head(50)

Unnamed: 0,general_terms,diagnosis,emotions,ngos,general terms,measures,impacts
0,mental illness,depression,unalive,Institute of Mental Health,covid-19,restriction,uncertainty
1,mental health,neurosis,help,IMH,epidemic,social distancing,death
2,self-harm,post traumatic stress disorder,dead,Samaritans Of Singapore,pandemic,circuit breaker,community cases
3,suicide,mood disorder,no motivation,SOS,pre-covid,quarantine,fatality rate
4,death,melancholia,loneliness,National Care Hotline,coronavirus,lockdown,cluster
5,therapy,dysthymia,lonely,Fei Yue’s Online Counselling Service,vaccine,wfh,retrenchment
6,counsellor,bipolar,isolated,Silver Ribbon Singapore,clinical trial,tighter measures,variant
7,counselling,insomnia,stress,Community Psychology Hub’s Online Counselling ...,contact tracing,phase 2,Stay Home Notice
8,trauma,,angst,Big Love Child Protection Specialist Centre,contactless,screening,shn
9,phobia,,emotion,HEART @ Fei Yue Child Protection Specialist Ce...,mask,testing,home based learning


In [4]:
for col in df.columns:
    print(f'Projected shape of the {col} column is {df[col].dropna().shape}')

Projected shape of the general_terms column is (26,)
Projected shape of the diagnosis column is (8,)
Projected shape of the emotions column is (18,)
Projected shape of the ngos column is (16,)
Projected shape of the general terms column is (24,)
Projected shape of the measures column is (16,)
Projected shape of the impacts column is (13,)


In [5]:
# Highly recommended that you scrape 3 columns at a time as Google Trends will time you out
startTime = time.time()
pytrend = TrendReq(hl='en-US') # language of the query

category_list_1 = ['general_terms',
                 'diagnosis',
                 'emotions']

category_list_2 = ['ngos',
                   'general terms',
                   'measures']

category_list_3 = ['impacts']

def google_trends_scraper(category_list, df): # pass in the original df with the shorter list (max 3)
    filtered_df = df.loc[:,category_list]

    for idx, category in enumerate(category_list):
        colnames = category_list[idx]
        list_temp = filtered_df.iloc[:,idx].dropna().to_list()
        
        extracted_data = []

        for i in range(0,len(list_temp)):
            keywords = [list_temp[i]]
            pytrend.build_payload(
            kw_list=keywords,
            cat=0, # to specify what of google trend you want to search (images? news?)
            timeframe='2019-01-01 2021-07-07', # date range of what you want
            geo='SG') # what country you want to pull the data from
            data = pytrend.interest_over_time()
            if not data.empty:
                data = data.drop(labels=['isPartial'],axis='columns')
            extracted_data.append(data)

        result = pd.concat(extracted_data, axis=1)
        
        missing_keywords = set(list_temp).difference(result.columns.to_list())
        
        if missing_keywords == set():
            print(f"All keywords under the '{category}' category has valid data")
        else:
            print(missing_keywords)
            
        result.to_csv(f'{output_path}{category}.csv')

#         executionTime = (time.time() - startTime)
#         print('Execution time in sec.: ' + str(executionTime))

# Load one cell at a time and wait for a couple seconds when the previous cell is done
Do not press all cells at a go. Google may deny your request

In [6]:
google_trends_scraper(category_list_1, df)

{'psychological support'}
All keywords under the 'diagnosis'category has valid data
{'weighed down', 'unalive'}


In [7]:
google_trends_scraper(category_list_2, df)

{'TOUCHline (Counselling)', 'Big Love Child Protection Specialist Centre', 'PAVE Integrated Services for Individual and Family Protection', 'Youth Mental Well-Being Network', 'HEART @ Fei Yue Child Protection Specialist Centre', 'Fei Yue’s Online Counselling Service', 'Community Psychology Hub’s Online Counselling platform'}
All keywords under the 'general terms'category has valid data
All keywords under the 'measures'category has valid data


In [8]:
google_trends_scraper(category_list_3, df)

{'job insecurity'}
