## Google Trends related queries 
Objective: get the related queries for all date where the peak of interest around coronavirus were reached (for every country). 

Peak of interest is reached when it tends to 100.

In [None]:
import pandas as pd
from pytrends.request import TrendReq

In [None]:
interest=pd.read_csv('data/interest_over_time.csv')
interest.sample(10)

In [None]:
# Filter dataframe to get only the row which reached a peak of interest >= 90

"""
90 seems to be a good point for getting enough relevant date where peak was reached and,
be discriminant enough to not getting dates day after day.

"""

peak_filtered=interest[interest.coronavirus>=90]
peak_filtered

In [None]:
# Get the credentials for Google Trends (Google account)

pytrends = TrendReq()

In [None]:
# Test for one specific request with date and location

pytrends.build_payload(['coronavirus'], cat=0, timeframe='2020-03-22 2020-03-22', geo='AU', gprop='')
related_queries=pytrends.related_queries()
rising_queries=related_queries['coronavirus']['rising']

In [None]:
rising_queries

In [None]:
# Figure out how to transform results into dictionary

rising_dict=rising_queries['query'].head(5).to_dict()
rising_dict

In [None]:
# create function to call related_queries method for the entire filtered dataframe

def get_related_queries(date,country):
    print(date,country)
    country_geo={'Australia':'AU','Canada':'CA','Ireland':'IE','South_Africa':'ZA',
                 'United_Kingdom':'GB','United_States_of_America':'US'}
    
    try:
        pytrends.build_payload(['coronavirus'], cat=0, timeframe=f'{date} {date}', geo=country_geo[country], gprop='')
    
    # Using the related query method of pytrends based on payload information
        related_queries=pytrends.related_queries()
    except:
        print("failed for:",date,country)
        
    rising_queries=related_queries['coronavirus']['rising']
    
    # transform the top 5 query into a dictionnary to be put into a new cell
    rising_dict=rising_queries['query'].head(5).to_dict()
    
    return rising_dict

# Testing the function
get_related_queries(peak_filtered.iloc[4].date,peak_filtered.iloc[4].Country)


In [None]:
# Apply the function to filtered dataframe and save results in a new column

peak_filtered['related_queries']=peak_filtered.apply(lambda x: get_related_queries(x.date,x.Country),axis=1)

In [None]:
# Save the data into a csv

peak_filtered.to_csv('data/related_queries.csv',sep=',',index=False)