In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup

In [None]:
COUNTRY_URL_FOR_RATINGS = {
    "colombia": 0,
    "costa rica": 0,
    "netherlands": 0,
    "chile": 0,
    "latvia": 0,
    "united kingdom": 0,
    "denmark": 0,
    "luxembourg": 0,
    "austria": 0,
    "new zealand": 0,
    "australia": 0,
    "israel": 0,
    "italy": 0,
    "finland": 0,
    "norway": 0,
    "slovenia": 0,
    "lithuania": 0,
    "india": 0,
    "slovak republic": 0,
    "greece": 0,
    "hungary": 0,
    "portugal": 0,
    "iceland": 0,
    "spain": 0,
    "germany": 0,
    "switzerland": 0,
    "japan": 0,
    "belgium": 0,
    "sweden": 0,
    "canada": 0,
    "ireland": 0,
    "usa": "usa",
    "mexico": 0,
    "france": 0,
    "russia": 0,
    "czech republic": 0,
    "south korea": 0,
    "south africa": 0,
    "poland": 0,
}

In [None]:
COUNTRY_TABLE_ID_FOR_MOODY_RATING = {
    "colombia": 0,
    "costa rica": 0,
    "netherlands": 0,
    "chile": 0,
    "latvia": 0,
    "united kingdom": 0,
    "denmark": 0,
    "luxembourg": 0,
    "austria": 0,
    "new zealand": 0,
    "australia": 0,
    "israel": 0,
    "italy": 0,
    "finland": 0,
    "norway": 0,
    "slovenia": 0,
    "lithuania": 0,
    "india": 0,
    "slovak republic": 0,
    "greece": 0,
    "hungary": 0,
    "portugal": 0,
    "iceland": 0,
    "spain": 0,
    "germany": 0,
    "switzerland": 0,
    "japan": 0,
    "belgium": 0,
    "sweden": 0,
    "canada": 0,
    "ireland": 0,
    "usa": "tb0_853",
    "mexico": 0,
    "france": 0,
    "russia": 0,
    "czech republic": 0,
    "south korea": 0,
    "south africa": 0,
    "poland": 0,
}

In [None]:
def get_raw_data_from_countryeconomy(country, table_id):
    """This function gets the raw data from countryeconomy.com
    Args:
    country: str: the country to get the data from, this has to be in the format as in the URL!
    table_id: str: the id of the table to get the data from.

    """
    country_url = f"https://countryeconomy.com/ratings/{country}"

    page = requests.get(country_url)

    # Check if the request was successful
    if page.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(page.text, "html.parser")

        # Find the table with the specified id
        table = soup.find("table", {"id": table_id})

        # Read the table into a pandas DataFrame
        return pd.read_html(str(table))[0]

    else:
        print("Failed to retrieve the webpage.")
        return None

In [None]:
def clean_scraped_table(raw_data, country, rating_provider="Moodys"):
    """This function cleans the raw data retrieved by the function get_raw_data_from_countryeconomy.

    Args: country: the name of the country, it is appended as a separate column
    rating_provider: the rating provider, it is appended as a separate column
    raw_data: the raw data retrieved by the function get_raw_data_from_countryeconomy

    Returns: pd.DataFrame: the cleaned data
        columns: "Date" (pd.Datetime): the date of the rating
                    "Rating" (str): the rating


    """
    # Joining Column Names from Multiindex
    raw_data.columns = [" ".join(col).strip() for col in raw_data.columns.values]

    # Create a dataframe only selecting the Long term foreign currency rating and the date
    raw_data_renamed = raw_data[
        [
            "Long term Rating Foreign currency Date",
            "Long term Rating Foreign currency Rating(Outlook)",
        ]
    ].rename(
        columns={
            "Long term Rating Foreign currency Date": "Date",
            "Long term Rating Foreign currency Rating(Outlook)": "Rating",
        },
    )

    # Add 'Country' and 'Rating Provider' columns

    df_clean = pd.DataFrame()
    df_clean["Date"] = pd.to_datetime(raw_data_renamed["Date"])
    df_clean["Rating"] = raw_data_renamed["Rating"].replace(
        r"\s*\([^)]*\)",
        "",
        regex=True,
    )
    df_clean["Country"] = country
    df_clean["Rating Provider"] = rating_provider

    return df_clean

In [None]:
def get_ratings_from_countryeconomy(country, rating_provider):
    country_name_for_url_link = COUNTRY_URL_FOR_RATINGS[country]

    if rating_provider == "Moodys":
        table_id = COUNTRY_TABLE_ID_FOR_MOODY_RATING[country]

    raw_data = get_raw_data_from_countryeconomy(country_name_for_url_link, table_id)
    return clean_scraped_table(raw_data, country, rating_provider)

In [None]:
usa_rating = get_ratings_from_countryeconomy("usa", "Moodys")

In [None]:
usa_rating.head()