In [1]:
import pandas as pd

from country_abbrev import *
from country_language import *
from pytrends.request import TrendReq

import os
import pycountry
import itertools

from googletrans import LANGCODES
import swifter

import trends_helpers
import numpy as np

In [2]:
currencies = pd.read_excel("countries-codes-and-currencies-(2020).xlsx")

In [3]:
keywords_list= list(set(currencies["Currency Name"]))
len(keywords_list)

141

In [4]:
countries_iso3 = currencies["Country Code"]
countries = [pycountry.countries.get(alpha_3=code).alpha_2 for code in countries_iso3]

In [5]:
# list of all unique languages:
unique_languages = pd.Series(list(set(list(itertools.chain(*country_language_dict.values())))), name='language')

# list of language codes from googletrans
langcodes = pd.DataFrame.from_dict(LANGCODES, orient='index', columns=['code'])
langcodes.index = langcodes.index.str.capitalize()

refugee_lang = unique_languages.to_frame().merge(langcodes, left_on='language', right_index=True, how='left')

refugee_lang.dropna(inplace=True)

refugee_lang_not_en = refugee_lang[refugee_lang['code'] != 'en']

In [6]:
translated_keyword_currency = refugee_lang_not_en['code'].swifter.apply(lambda x: trends_helpers.translate_keywords_list(lst = keywords_list, lang= x))

Pandas Apply:   0%|          | 0/83 [00:00<?, ?it/s]

In [7]:
refugee_lang_not_en

Unnamed: 0,language,code
0,Amharic,am
3,Belarusian,be
6,Samoan,sm
11,Sesotho,st
12,French,fr
...,...,...
187,Afrikaans,af
188,Romanian,ro
189,Danish,da
190,Croatian,hr


In [8]:
columns = list(refugee_lang_not_en['code'])

df = pd.concat([pd.DataFrame(sublist, columns=[col]) for sublist, col in zip(translated_keyword_currency, columns)], axis=1)

df['en']=keywords_list

df.head()

Unnamed: 0,am,be,sm,st,fr,ca,ar,uk,de,km,...,ko,uz,ceb,kk,af,ro,da,hr,th,en
0,ጋና ሲዲ,ганскі сядзі,ghana cedi,ghana cedi,cedi ghanéen,cedi de ghana,غانا سيدي,ганський седі,ghana-cedi,ហ្កាណា សេឌី,...,가나 세디,gana sedi,ghana cedi,гана седиі,ghana cedi,ghana cedi,ghana cedi,ganski cedi,เซดีกานา,Ghana Cedi
1,የኮንጐ ፍራንክ,кангалезскі франк,congolese franc,franc ea congo,franc congolais,franc congolès,فرنك كونغولي,конголезький франк,kongolesischer franc,ហ្វ្រង់កុងហ្គោ,...,콩고 프랑,kongo franki,congolese franc,конголық франк,kongolese frank,franc congolez,congolesiske franc,kongoanski franak,ฟรังก์คองโก,Congolese Franc
2,ሩብል,рубель,ruble,ruble,rouble,ruble,روبل,рубль,rubel,រូប្លិ,...,루블,rubl,ruble,рубль,roebel,rublă,rubler,rublja,รูเบิล,Rouble
3,የሊባኖስ ፓውንድ,ліванскі фунт,pauna a lepanona,lebanese ponto,livre libanaise,lliura libanesa,ليرة لبنانية,ліванський фунт,libanesisches pfund,ផោនលីបង់,...,레바논 파운드,livan funti,lebanese pound,ливан фунты,libanese pond,lira libaneză,libanesisk pund,libanonska funta,ปอนด์เลบานอน,Lebanese Pound
4,ሰኞ,панядзелак,aso gafua,mantaha,lundi,dilluns,الاثنين,понеділок,montag,ថ្ងៃច័ន្ទ,...,월요일,dushanba,lunes,дүйсенбі,maandag,luni,mandag,ponedjeljak,วันจันทร์,Som


In [9]:
country_language_dict
max_length = max(map(len, country_language_dict.values()))

data_padded = {key: arr + [np.nan] * (max_length - len(arr)) for key, arr in country_language_dict.items()}


In [10]:
langs = pd.DataFrame(data_padded)
langs = langs.T
langs = langs.reset_index()

In [11]:
max_length = max(map(len, country_language_dict.values()))

data_padded = {key: arr + [np.nan] * (max_length - len(arr)) for key, arr in country_language_dict.items()}

langs = pd.DataFrame(data_padded)
langs = langs.T
langs = langs.reset_index()
langs = langs.rename(columns={'index': 'Country', 0:'lang1', 1:'lang2', 2:'lang3'})

# Apply the function to the 'Country' column
langs['ISO2'] = langs['Country'].apply(trends_helpers.get_iso2_country_code)

langs = langs.drop(columns=["Country"])
langs_long = pd.melt(langs, id_vars=['ISO2'], var_name='numlang', value_name='lang')
langs_long = langs_long.dropna()

In [12]:
langs_long = pd.merge(langs_long, refugee_lang_not_en, left_on="lang", right_on="language")

In [13]:
count_files=trends_helpers.file_counter("currency_partial results/original_lang")
print("{:.1f}% done.".format((count_files+8)/len(countries)*100))

38.7% done.


In [15]:
from pytrends.request import TrendReq
import time


# Set up the Google Trends API object
# pytrends = TrendReq()

results = pd.DataFrame() 

# Loop through regions and keywords

count_files=trends_helpers.file_counter("currency_partial results/original_lang")


for i, country in enumerate(langs_long["ISO2"][count_files+8:], start=count_files+8): # added +x a posteriori - empty dfs
    
    languagecode= langs_long["code"][i]

    for keyword in df[languagecode]:    
        print("Searching: " + str(keyword) + " in " + str(country))
        time.sleep(4)
        
        pytrends = TrendReq(tz=360, timeout=(10, 25), retries=2, backoff_factor=0.5,
            requests_args={'headers': {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64)'}})
        
        # Build the payload with the selected region, keyword and date range
        pytrends.build_payload(kw_list=[keyword], timeframe='all', geo=str(country))
        # Get the interest over time data for the selected keyword
        interest_over_time_df = pytrends.interest_over_time()

        # Check if the DataFrame is not empty
        if not interest_over_time_df.empty:
            interest_over_time_df= interest_over_time_df.reset_index()
            interest_over_time_df['trends_index'] = interest_over_time_df.iloc[:, 1] 
            interest_over_time_df = interest_over_time_df.drop(interest_over_time_df.columns[1], axis=1)

            interest_over_time_df = interest_over_time_df.drop(columns=["isPartial"], axis=1)
            interest_over_time_df["keyword"]=keyword
            interest_over_time_df["region"] = country
            
            # Append the results to the main dataframe
            results = pd.concat([results, interest_over_time_df])
    
    if not results.empty:
        results_country = results[results["region"]==country]
        if not results_country.empty:
            translation = {'keyword': df[langs_long["code"][i]], 'keyword_en': df["en"]}
            translation = pd.DataFrame(translation)
            results_country = pd.merge(results_country, translation, left_on="keyword", right_on="keyword", how="left")
            file_path = 'currency_partial results/original_lang/' + str(country) + '_' + languagecode + '.csv'
            results_country.to_csv(file_path, index=False)
        else:
            count_files = count_files+1 # because the loop interrups, should find a way to store this to memory
    else:
        count_files = count_files+1
    time.sleep(300) # Wait a few minutes until starting with next country 


Searching: ghana-cedi in RO
Searching: kongolesischer franc in RO
Searching: rubel in RO
Searching: libanesisches pfund in RO
Searching: montag in RO
Searching: kapgrüner schild in RO
Searching: griwna in RO
Searching: barbados-dollar in RO
Searching: isländische krone in RO
Searching: china in RO
Searching: niederschlag in botswana in RO
Searching: wandelbare marke in RO
Searching: norwegische krone in RO
Searching: nehmen in RO
Searching: algerischer dinar in RO
Searching: brasilianischer real in RO
Searching: guarani in RO
Searching: zloty in RO
Searching: es gibt in RO
Searching: irakischer dinar in RO
Searching: summe in RO
Searching: kubanischen peso in RO
Searching: kyat in RO
Searching: tansania-schilling in RO
Searching: neuer schekel in RO
Searching: sie in RO
Searching: kenianischer schilling in RO
Searching: pa'anga in RO
Searching: salomon-dollar in RO
Searching: mosambikanische metical in RO
Searching: fidschi-dollar in RO
Searching: drama in RO
Searching: pennen in RO
Se

RetryError: HTTPSConnectionPool(host='trends.google.com', port=443): Max retries exceeded with url: /trends/api/explore?hl=en-US&tz=360&req=%7B%22comparisonItem%22%3A+%5B%7B%22keyword%22%3A+%22%5Cu09ac%5Cu09cd%5Cu09b0%5Cu09be%5Cu099c%5Cu09bf%5Cu09b2%5Cu09bf%5Cu09af%5Cu09bc%5Cu09be%5Cu09a8+%5Cu09b0%5Cu09bf%5Cu09af%5Cu09bc%5Cu09be%5Cu09b2%22%2C+%22time%22%3A+%22all%22%2C+%22geo%22%3A+%22BD%22%7D%5D%2C+%22category%22%3A+0%2C+%22property%22%3A+%22%22%7D (Caused by ResponseError('too many 429 error responses'))

In [24]:
results