In [1]:
from pytrends.request import TrendReq
import pandas as pd
from googletrans import Translator
import time
translator = Translator()

In [2]:
country_code_df = pd.read_csv("country_code.csv")

iso2name = {}
for i, row in country_code_df.iterrows():
    iso2name[row['iso']] = row['Country']

In [3]:

# Country-level analysis

country_iso = ['US', 'ES', 'IT', 'DE', 'FR', 'IR', 'GB', 'TR', 'CH']

country2lan = {
    "US": "en",
    "ES": "es",
    "IT": "it",
    "DE": "de",
    "FR": "fr",
    "CN": "zh-CN",
    "IR": "fa",
    "GB": "en",
    "TR": "tr",
    "CH": "de",
}

# tz: Timezone Offset (in minutes) US CST is '360'
pytrends = TrendReq(hl='en-US', tz=360, timeout=(10,25))


# key word lists (virus, symptoms, policy)
word_list = ["coronavirus", "covid", "dry cough", "fever", "shortness of breath", "face mask"]
# kw_list = ["cough"]

country_df = pd.DataFrame(columns=(['date', 'iso', 'country'] + word_list))


date_list = pd.date_range(start="2020-01-01",end="2020-05-04").tolist()

for iso in country_iso:
    
    d = {'date': date_list,
         'iso': [iso] * len(date_list),
         'country': [iso2name[iso]] * len(date_list)
        }

    for word in word_list:
        translated_word = translator.translate(word, src="en", dest=country2lan[iso]).text
        print(translated_word)
        time.sleep(1)
        pytrends.build_payload([translated_word], 
                               cat=0, 
                               timeframe='2020-01-01 2020-05-04',
                               geo=iso,
                               gprop='')
        temp = pytrends.interest_over_time()
        # if there is enough data
        d[word] = temp[translated_word]

    
    temp_df = pd.DataFrame(data=d)
    country_df = country_df.append(temp_df, ignore_index = True)

country_df = country_df.reindex(columns=(['date', 'iso', 'country'] + word_list))

country_df.to_csv ('google_trend_country.csv', index = False, header=True)


coronavirus
covid
dry cough
fever
shortness of breath
face mask
coronavirus
COVID-19
tos seca
fiebre
dificultad para respirar
mascarilla
coronavirus
covid
tosse secca
febbre
fiato corto
mascherina
Coronavirus
Covid
trockener Husten
Fieber
Kurzatmigkeit
Schutzmaske
coronavirus
covid
toux sèche
fièvre
essoufflement
masque
ویروس کرونا
کووید
سرفه خشک
تب
تنگی نفس
ماسک صورت
coronavirus
covid
dry cough
fever
shortness of breath
face mask
koronavirüs
kovid
Kuru öksürük
ateş
nefes darlığı
yüz maskesi
Coronavirus
Covid
trockener Husten
Fieber
Kurzatmigkeit
Schutzmaske


In [193]:
# tz: Timezone Offset (in minutes) US CST is '360'

country_iso = ['US', 'ES', 'IT', 'DE', 'FR', 'IR', 'GB']

country2lan = {
    "US": "en",
    "ES": "es",
    "IT": "it",
    "DE": "de",
    "FR": "fr",
    "CN": "zh-CN",
    "IR": "fa",
    "GB": "en",
    "TR": "tr",
    "CH": "de",
}

pytrends = TrendReq(hl='en-US', tz=360, timeout=(10,25))


# key word lists (virus, symptoms, policy)
word_list = ["coronavirus", "covid", "dry cough", "fever", "shortness of breath", "face mask"]
# kw_list = ["cough"]

region_df = pd.DataFrame(columns=(['date', 'iso', 'country', 'geoName', 'geoCode'] + word_list))


date_list = pd.date_range(start="2020-01-01",end="2020-04-05").tolist()

for iso in country_iso:
    print(iso)
    pytrends.build_payload(["coronavirus"], 
                           cat=0, 
                           timeframe='2020-04-01 2020-04-02',
                           geo=iso,
                           gprop='')
    regions = pytrends.interest_by_region(resolution='COUNTRY', inc_low_vol=True, inc_geo_code=True)
    regions = regions.reset_index()
    region_list = [(n, c) for n,c in zip(list(regions['geoName']), list(regions['geoCode']))]
    print(region_list)
    for name, code in region_list:
        d = {'date': date_list,
             'iso': [iso] * len(date_list),
             'country': [iso2name[iso]] * len(date_list),
             'geoName': [name] * len(date_list),
             'geoCode': [code] * len(date_list),
            }

        for word in word_list:
            translated_word = translator.translate(word, src="en", dest=country2lan[iso]).text
            print(translated_word)
            time.sleep(1)
            pytrends.build_payload([translated_word], 
                                   cat=0, 
                                   timeframe='2020-01-01 2020-04-05',
                                   geo=code,
                                   gprop='')
            temp = pytrends.interest_over_time()
            # if there is enough data
            if len(temp) > 0:
                d[word] = temp[translated_word]
    
        temp_df = pd.DataFrame(data=d)
        region_df = region_df.append(temp_df, ignore_index = True)

region_df = region_df.reindex(columns=(['date', 'iso', 'country', 'geoName', 'geoCode'] + word_list))

region_df.to_csv ('google_trend_region.csv', index = False, header=True)


US
[('Alabama', 'US-AL'), ('Alaska', 'US-AK'), ('Arizona', 'US-AZ'), ('Arkansas', 'US-AR'), ('California', 'US-CA'), ('Colorado', 'US-CO'), ('Connecticut', 'US-CT'), ('Delaware', 'US-DE'), ('District of Columbia', 'US-DC'), ('Florida', 'US-FL'), ('Georgia', 'US-GA'), ('Hawaii', 'US-HI'), ('Idaho', 'US-ID'), ('Illinois', 'US-IL'), ('Indiana', 'US-IN'), ('Iowa', 'US-IA'), ('Kansas', 'US-KS'), ('Kentucky', 'US-KY'), ('Louisiana', 'US-LA'), ('Maine', 'US-ME'), ('Maryland', 'US-MD'), ('Massachusetts', 'US-MA'), ('Michigan', 'US-MI'), ('Minnesota', 'US-MN'), ('Mississippi', 'US-MS'), ('Missouri', 'US-MO'), ('Montana', 'US-MT'), ('Nebraska', 'US-NE'), ('Nevada', 'US-NV'), ('New Hampshire', 'US-NH'), ('New Jersey', 'US-NJ'), ('New Mexico', 'US-NM'), ('New York', 'US-NY'), ('North Carolina', 'US-NC'), ('North Dakota', 'US-ND'), ('Ohio', 'US-OH'), ('Oklahoma', 'US-OK'), ('Oregon', 'US-OR'), ('Pennsylvania', 'US-PA'), ('Rhode Island', 'US-RI'), ('South Carolina', 'US-SC'), ('South Dakota', 'US-SD

Maschera viso
coronavirus
covid
tosse secca
febbre
mancanza di respiro
Maschera viso
coronavirus
covid
tosse secca
febbre
mancanza di respiro
Maschera viso
DE
[('Baden-Württemberg', 'DE-BW'), ('Bavaria', 'DE-BY'), ('Berlin', 'DE-BE'), ('Brandenburg', 'DE-BB'), ('Bremen', 'DE-HB'), ('Hamburg', 'DE-HH'), ('Hesse', 'DE-HE'), ('Lower Saxony', 'DE-NI'), ('Mecklenburg-Vorpommern', 'DE-MV'), ('North Rhine-Westphalia', 'DE-NW'), ('Rhineland-Palatinate', 'DE-RP'), ('Saarland', 'DE-SL'), ('Saxony', 'DE-SN'), ('Saxony-Anhalt', 'DE-ST'), ('Schleswig-Holstein', 'DE-SH'), ('Thuringia', 'DE-TH')]
Coronavirus
Covid
trockener Husten
Fieber
Kurzatmigkeit
Gesichtsmaske
Coronavirus
Covid
trockener Husten
Fieber
Kurzatmigkeit
Gesichtsmaske
Coronavirus
Covid
trockener Husten
Fieber
Kurzatmigkeit
Gesichtsmaske
Coronavirus
Covid
trockener Husten
Fieber
Kurzatmigkeit
Gesichtsmaske
Coronavirus
Covid
trockener Husten
Fieber
Kurzatmigkeit
Gesichtsmaske
Coronavirus
Covid
trockener Husten
Fieber
Kurzatmigkeit
Gesic