# <b> Chapter 9

### For the next chapter, I'll be using the public Data on the Revenue of the Brazilian State of Goiás and BTC X S&P 500 historical prices.

Source: 

- https://dadosabertos.go.gov.br/dataset/receitas-detalhadas
- https://fred.stlouisfed.org/series/SP500
- https://fred.stlouisfed.org/series/CBBTCUSD

In [29]:
from datetime import datetime
import pandas as pd
import pytz
import os
import re

## Getting and Treating the Data - Receitas Detalhadas

In [30]:
# Loading the data

root = os.getcwd()
paths = [
    root + "\\" + file
    for file in os.listdir()
    if re.match("ReceitasDetalhadas.*.csv", file)
]
if paths:
    df = pd.DataFrame()
    for i, path in enumerate(paths):
        with open(path, "r", encoding="utf-8") as f:
            data = pd.read_csv(f, index_col=False)
            df = pd.concat([df, data])
    df.columns

    # Formatting the dates

    t = ["int32" if re.match("(COD.*|RECEITA.*)", c) else "string" for c in df.columns]
    df = df.astype({k: v for k, v in zip([*df.columns], t)})
    df["MES"] = df["MES"].map(
        {
            "Janeiro": "01",
            "Fevereiro": "02",
            "Março": "03",
            "Abril": "04",
            "Maio": "05",
            "Junho": "06",
            "Julho": "07",
            "Agosto": "08",
            "Setembro": "09",
            "Outubro": "10",
            "Novembro": "11",
            "Dezembro": "12",
        }
    )
    date_parser = (
        lambda x: pytz.timezone("America/Sao_Paulo")
        .localize(datetime.strptime(x, "%Y/%m"))
        .isoformat()
    )
    df["DATA"] = df["ANO"] + "/" + df["MES"]
    df["DATA"] = pd.to_datetime(df["DATA"].map(date_parser))
    df = df.drop(columns=["ANO", "MES", "ANO_MES"], axis=1)
    df.to_csv(f"{root}\\ReceitasGoias2022", sep=";", index=False)

## Getting and Treating the Data  - BTC and S&P 500

In [71]:
btc_path = "../data/Bitcoin Historical (USD).csv"
sp_path = "../data/S&P 500 Historical (USD).csv"

with open(btc_path, mode="r+", encoding="utf-8") as btc:
    with open(sp_path, mode="r+", encoding="utf-8") as sp:
        btc_df = pd.read_csv(btc, sep=",", index_col=False, parse_dates=["DATE"])
        sp_df = pd.read_csv(sp, sep=",", index_col=False, parse_dates=["DATE"])
        btc_df = btc_df[btc_df["DATE"] >= "2015-01-01"].sort_values(by="DATE")
        sp_df = sp_df[sp_df["DATE"] >= "2015-01-01"].sort_values(by="DATE")
        btc_df.loc[btc_df["CBBTCUSD"] == ".", "CBBTCUSD"] = None
        sp_df.loc[sp_df["SP500"] == ".", "SP500"] = None
        pd.merge(btc_df, sp_df, on='DATE').to_csv('../data/btc_s&p.csv', index=False)