# Data Extraction and Preprocessing

In [1]:
#!pip install yfinance

import numpy as np
import pandas as pd
from datetime import datetime
import yfinance as yf

In [2]:
symbols = [
    '^GSPC', #S&P500
    '^DJI',  #Dow Jones
    '^IXIC', #Nasdaq
    'CL=F',  #Óleo Bruto
    'GC=F',  #Ouro
    '^BVSP', #IBOV
    'CL=F', #Petróleo
]

data = yf.download(
    tickers=' '.join(symbols),
    period='max',
    interval='1d',
    auto_adjust = True,
    threads=True,
    group_by='ticker'
)

[*********************100%***********************]  6 of 6 completed


In [3]:
data = data.loc['2002-01-01':,:]

In [4]:
data2 = data.copy(deep=True)
index = pd.date_range(start=data2.index.min(), end=data2.index.max(), freq='D')
data2 = data2.reindex(index)
data2 = data2.interpolate(method='linear')
data2.dropna(axis=0, inplace=True)

In [5]:
(data2.isnull().sum() > 0).sum()

0

In [6]:
data3 = data2.copy(deep=True)
data3.rename(columns={
    '^GSPC': 'S&P500',
    '^DJI': 'Dow_Jones',
    '^IXIC': 'Nasdaq',
    'CL=F': 'Óleo_Bruto',
    'GC=F': 'Ouro',
    '^BVSP': 'IBOV',
    'CL=F': 'Petróleo',
}, inplace=True)

#data3.columns = list(map(lambda x : f'{x[0]}_{x[1]}', data3.columns.to_flat_index()))

In [7]:
to_concat = []
to_concat.append(data3.loc[:, 'IBOV'].rename(columns={'Open': 'IBOV_Open', 'High': 'IBOV_High', 'Low': 'IBOV_Low', 'Close': 'IBOV_Close', 'Volume': 'IBOV_Volume'}))
l0, l1 = zip(*data3.drop('IBOV', level=0, axis=1).columns)
l0 = list(set(l0))
for item in l0:
    _ = data3.loc[:,(item, 'Close')]
    _.name = f'{item}_Close'
    to_concat.append(_.to_frame())
data4 = pd.concat(to_concat, axis=1, join='inner')

In [8]:
data4.columns

Index(['IBOV_Open', 'IBOV_High', 'IBOV_Low', 'IBOV_Close', 'IBOV_Volume',
       'Nasdaq_Close', 'Ouro_Close', 'S&P500_Close', 'Petróleo_Close',
       'Dow_Jones_Close'],
      dtype='object')

In [9]:
data4.to_csv('dataset2.csv')