# Edição e pré-processamento de dados do mercado físico de Boi Gordo no Brasil</font>

## Fonte de dados: [SCOT Consultoria](https://www.scotconsultoria.com.br/cotacoes/boi-gordo/?ref=smn)

## Tratamento de arquivos '.csv'

### Gerando uma tabela de cotações diárias para as 32 praças observadas com indexação por data.

In [None]:
# Importando os pacotes 
import pandas as pd
import numpy as np
import csv
import os
from os import listdir
from os.path import isfile, join
from google.colab import drive

In [None]:
# Carregando datasets do Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Checando o formato dos arquivos
df = pd.read_csv("/content/drive/My Drive/DataCattle/DadosMF/20200102 MFBG.csv", header = None , sep = ";", encoding = 'Latin-1')
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,SP Barretos,20000,,20200,,-,20250,,20450
1,SP Araçatuba,20000,,20200,,-,20250,,20450
2,MG Triângulo,19000,,19200,,"-4,95%",19250,,19450
3,MG B.Horizonte,18200,,18400,,"-8,91%",18450,,18650
4,MG Norte,18900,,19200,,"-4,95%",19150,,19450


In [None]:
# Definindo Diretorio
path = "/content/drive/My Drive/DataCattle/DadosMF/"

# Recuperando lista de ficheiros CSV do diretorio
ficheiros = [f for f in listdir(path) if (isfile(join(path, f)) and f.endswith('.csv')) ]
ficheiros.sort()

In [None]:
# Definindo variáveis
datas = []
Pracas = []
df_valores = pd.DataFrame()

# Recuperando as datas e valores para cada ficheiro
for item in ficheiros:
    vard = item[0:8]
    data = int(vard)
    datas.append(data)
    preco = []
    with open(path + item, encoding = 'latin-1') as f:
        reader = csv.reader(f, delimiter = ';')
        for row in reader:
            valor = row[1]
            praca = row[0]
            preco.append(valor)
            Pracas.append(praca)
            Pracas = Pracas[0:32]                            
    df = pd.DataFrame(preco)
    dft = df.T
    df_valores = pd.concat([df_valores, dft], ignore_index = True)
df_valores.columns = Pracas
df_datas = pd.concat([pd.DataFrame([i], columns = ['Data']) for i in datas], ignore_index = True)

In [None]:
# Unindo datas e valores num unico dataframe
mfbg = df_datas.join(df_valores)
mfbg.set_index('Data', inplace = True)
mfbg.tail(20)

Unnamed: 0_level_0,SP Barretos,SP Araçatuba,MG Triângulo,MG B.Horizonte,MG Norte,MG Sul,GO Goiânia,GO Reg. Sul,MS Dourados,MS C. Grande,MS Três Lagoas,RS Oeste (kg),RS Pelotas (kg),BA Sul,BA Oeste,MT Norte,MT Sudoeste,MT Cuiabá**,MT Sudeste,PR Noroeste,SC Oeste* ***,MA Oeste,Alagoas,PA Marabá,PA Redenção,PA Paragominas,RO Sudeste,TO Sul,TO Norte,Acre,ES,RJ
Data,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1
20200821,22350,22350,22450,22650,22650,22450,21950,21950,21450,21650,21450,710,710,23450,23650,20500,20300,21000,21000,22050,21850,22350,23950,22450,22150,22150,20500,22650,22450,19000,21450,22150
20200824,22550,22550,22450,22650,22750,22450,21950,21950,21650,21650,21650,700,700,23750,23650,20500,20400,21000,21000,22050,21850,22950,23950,22550,22550,22450,20500,22650,22450,19000,21450,22250
20200825,22650,22650,22550,22650,22850,22450,22150,21950,21850,21650,21650,700,700,23950,23650,20700,20400,21000,21000,22150,22050,22950,24050,22550,22550,22450,20700,22650,22450,19000,21450,22350
20200826,22750,22750,22650,22650,22950,22350,22250,21950,21950,21750,21750,700,700,23950,23650,20800,20500,21000,21000,22150,21850,22950,24150,22550,22750,22450,20800,22650,22550,19300,21450,22350
20200827,22750,22750,22650,22650,22950,22350,22250,21950,22050,21850,21850,700,700,23950,23650,20800,20500,21000,21000,22150,22050,22950,24150,22550,22850,22450,20800,22650,22650,19500,21450,22350
20200828,22850,22850,22750,22850,23050,22550,22550,22150,22150,22150,21950,700,700,24150,23950,20800,20600,21350,21200,22150,22250,22950,24150,22650,22850,22650,21200,22650,22650,20200,21450,22550
20200831,23350,23350,22750,22850,23050,22650,22550,22250,22450,22250,22050,700,700,24350,24050,21000,21000,21450,21350,22150,22450,22950,24150,22750,22850,22950,21450,22950,22850,20300,21650,22550
20200901,23450,23450,22950,23150,23350,22750,22650,22450,22650,22450,22450,690,690,24450,24150,21100,21450,21650,21450,22650,22650,23500,24650,22950,23050,22950,21450,23150,22950,20400,21950,22950
20200902,23650,23650,22950,23450,23450,22750,22650,22450,22650,22650,22450,690,690,24450,24450,21200,21450,21650,21550,22850,22750,23500,24650,23150,23150,23150,21450,23250,23250,20400,21950,22950
20200903,23650,23650,23150,23450,23450,22950,22650,22550,22950,22850,22550,690,690,24650,24550,21200,21450,21650,21650,22850,22750,23500,24650,23150,23150,23250,21650,23250,23350,20400,22150,22950


In [None]:
print(mfbg.dtypes)

In [None]:
# Salvando dataframe como arquivo '.csv'
mfbg.to_csv("/content/drive/My Drive/DataCattle/BDMFBG.csv", sep = ";")

## Fim