In [18]:
import pandas as pd
from countryinfo import CountryInfo
import pycountry
import en_vocabulary_anki as eva

## Functions

In [19]:
# Generate country code by alpha-2
def generate_country_code(country):
    try:
        return CountryInfo(country).info()["ISO"]["alpha2"]
    except:
        try: 
            return pycountry.countries.search_fuzzy(country)[0].alpha_2
        except:
            raise Exception(f"Could not determine the country code for '{country}'")

# Create flashcards for countries and nationalities        
def create_flashcards(df):
    for row in df.itertuples():
        country_ipa = ""
        nationality_ipa = ""
        country_code = ""
        country_file_name = eva.create_word_pronunciation(row.English_Country)
        nationality_file_name = eva.create_word_pronunciation(row.English_Nationality)

        try:
            country_ipa = eva.get_word_ipa(row.English_Country, slash=True)            
        except Exception as e:
            print(f"Error getting IPA for country '{row.English_Country}': {e}")

        try:        
            nationality_ipa = eva.get_word_ipa(row.English_Nationality, slash=True)
        except Exception as e:
            print(f"Error getting IPA for nationality '{row.English_Nationality}': {e}")

        try:
            country_code = generate_country_code(row.English_Country).lower()            
        except Exception as e:
            print(f"Error getting country code '{country_code}': {e}")            

        eva.add_note(deckName="Idiomas::Ingles::Ingles-Pais-Nacionalidad",
            modelName="Ingles-Verbos-Irregulares", 
            fields = {
                "Word": row.Spanish_Country,
                "Text": f"Country<br>{{{{c1::{row.English_Country.lower()}}}}}<br>Nationality<br>{{{{c1::{row.English_Nationality.lower()}}}}}",
                "Fonética": f"{country_ipa}<br>{nationality_ipa}",
                "Imagen": "",
                "Pronunciación": f"[sound:{country_file_name}]<br>[sound:{nationality_file_name}]",
            },
            options = {
                "allowDuplicate": False
            },
            picture={
                "url": f"https://flagcdn.com/w320/{country_code}.png",
                "filename": f"{eva.generate_file_name(country_code)}.png",                    
                "fields": [
                    "Imagen"
                ]
            })        
        

## Dataset Analysis

In [20]:
df = pd.read_csv('paises_gpt.csv')
df.head()

Unnamed: 0,Spanish_Country,English_Country,Spanish_Nationality,English_Nationality
0,Afganistán,Afghanistan,afgano/a,Afghan
1,Albania,Albania,albanés/albanesa,Albanian
2,Alemania,Germany,alemán/alemana,German
3,Andorra,Andorra,andorrano/a,Andorran
4,Angola,Angola,angoleño/a,Angolan


In [21]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 194 entries, 0 to 193
Data columns (total 4 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   Spanish_Country      194 non-null    object
 1   English_Country      194 non-null    object
 2   Spanish_Nationality  194 non-null    object
 3   English_Nationality  194 non-null    object
dtypes: object(4)
memory usage: 6.2+ KB


In [22]:
# Search for rows where 'English_Country' or 'English_Nationality' do not contain alphanumeric characters
df[df['English_Country'].str.contains(r'[^a-zA-Z0-9\s]') | df['English_Nationality'].str.contains(r'[^a-zA-Z0-9\s]')]

Unnamed: 0,Spanish_Country,English_Country,Spanish_Nationality,English_Nationality
28,Burkina Faso,Burkina Faso,burkinés/burkinabé,Burkinabé
50,Dominica,Dominica,dominiqués/dominiqueña,Dominican (from Dominica)
76,Guinea-Bisáu,Guinea-Bissau,bisauguineano/a,Bissau-Guinean
97,Kiribati,Kiribati,kiribatiano/a,I-Kiribati
156,Santo Tomé y Príncipe,São Tomé and Príncipe,santomense,São Toméan
174,Timor Oriental,Timor-Leste,timorense,Timorese
186,Vanuatu,Vanuatu,vanuatuense,Ni-Vanuatu


## Create FlashCards with countries and nationalities

In [23]:
create_flashcards(df)

Error getting IPA for nationality 'Kazakhstani': The word doesn't exist
Error getting IPA for nationality 'Kyrgyzstani': The word doesn't exist
Error getting IPA for nationality 'I-Kiribati': The word doesn't exist
Error getting IPA for nationality 'Palauan': The word doesn't exist
Error getting IPA for nationality 'Sammarinese': The word doesn't exist
Error getting IPA for nationality 'Tajikistani': The word doesn't exist
Error getting IPA for country 'Timor-Leste': The word doesn't exist
Error getting IPA for nationality 'Uzbekistani': The word doesn't exist
Error getting IPA for nationality 'Ni-Vanuatu': The word doesn't exist
