In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

In [2]:
#Load Gopi data base with susbsection granularity
base_dir = Path.home()
manipulated_data_directory = base_dir / "NaMinhaRua" / "Ficheiros de dados Manipulados"
gopi = pd.read_csv(manipulated_data_directory/ 'gopidatasetmodel1_granularidade_subseccao.csv')
print(gopi.dtypes)

dt_registo        object
Subseccao          int64
Freguesia         object
BNFDRB           float64
CA               float64
CDPC2RD          float64
CDRD             float64
CDEEP            float64
DDP              float64
EOVRDJOPANVP     float64
EA               float64
FDDDCDPC2ROSM    float64
G                float64
LDVPDDPVELDVP    float64
OIEVPER          float64
PED              float64
RNADRDDRSU       float64
RSRPDP           float64
RJPDR            float64
RMPDR            float64
RRPDR            float64
SOOLA            float64
ÁAORM            float64
Fim_de_Semana       bool
Feriado             bool
Inverno             bool
Primavera           bool
Verao               bool
Outono              bool
dtype: object


In [3]:
# Convert the 'dt_registro' column to datetime
gopi['dt_registo'] = pd.to_datetime(gopi['dt_registo'], errors='coerce')

# List of columns to be converted to float32
cols_to_convert = [
    'BNFDRB', 'CA', 'CDPC2RD', 'CDRD', 'CDEEP', 'DDP', 'EOVRDJOPANVP', 'RNADRDDRSU', 'RSRPDP', 
    'RJPDR', 'RMPDR', 'RRPDR', 'SOOLA', 'ÁAORM', 'EA', 'FDDDCDPC2ROSM', 'G', 'LDVPDDPVELDVP',
    'OIEVPER', 'PED'
]

# Check if columns exist in the DataFrame and convert them to float32
for col in cols_to_convert:
    if col in gopi.columns:
        gopi[col] = gopi[col].astype(bool)
        print(f"Coluna '{col}' convertida para boolean.")
    else:
        print(f"Coluna '{col}' não encontrada no DataFrame.")

# View the data types of each column to confirm the conversion
print(gopi.dtypes)


Coluna 'BNFDRB' convertida para boolean.
Coluna 'CA' convertida para boolean.
Coluna 'CDPC2RD' convertida para boolean.
Coluna 'CDRD' convertida para boolean.
Coluna 'CDEEP' convertida para boolean.
Coluna 'DDP' convertida para boolean.
Coluna 'EOVRDJOPANVP' convertida para boolean.
Coluna 'RNADRDDRSU' convertida para boolean.
Coluna 'RSRPDP' convertida para boolean.
Coluna 'RJPDR' convertida para boolean.
Coluna 'RMPDR' convertida para boolean.
Coluna 'RRPDR' convertida para boolean.
Coluna 'SOOLA' convertida para boolean.
Coluna 'ÁAORM' convertida para boolean.
Coluna 'EA' convertida para boolean.
Coluna 'FDDDCDPC2ROSM' convertida para boolean.
Coluna 'G' convertida para boolean.
Coluna 'LDVPDDPVELDVP' convertida para boolean.
Coluna 'OIEVPER' convertida para boolean.
Coluna 'PED' convertida para boolean.
dt_registo       datetime64[ns]
Subseccao                 int64
Freguesia                object
BNFDRB                     bool
CA                         bool
CDPC2RD              

In [4]:
gopi.head(2)

Unnamed: 0,dt_registo,Subseccao,Freguesia,BNFDRB,CA,CDPC2RD,CDRD,CDEEP,DDP,EOVRDJOPANVP,...,RMPDR,RRPDR,SOOLA,ÁAORM,Fim_de_Semana,Feriado,Inverno,Primavera,Verao,Outono
0,2018-01-01,11066202005,Parque das Nações,False,True,False,False,False,False,False,...,False,False,False,False,False,True,True,False,False,False
1,2018-01-01,11061802202,Lumiar,False,False,False,False,False,False,False,...,False,False,False,True,False,True,True,False,False,False


In [5]:
#Filter and save a new data base for the target parishes
freguesias_desejadas = ['Santo António', 'Olivais', 'Alcântara']
gopi_filtrado = gopi[gopi['Freguesia'].isin(freguesias_desejadas)]
gopi_filtrado.to_csv(manipulated_data_directory/'gopidatasetgroupmodel_Subseccao_ready_to_be_merged_with_ipma.csv',sep=',', index=False)

In [6]:
gopi_filtrado.head(10)

Unnamed: 0,dt_registo,Subseccao,Freguesia,BNFDRB,CA,CDPC2RD,CDRD,CDEEP,DDP,EOVRDJOPANVP,...,RMPDR,RRPDR,SOOLA,ÁAORM,Fim_de_Semana,Feriado,Inverno,Primavera,Verao,Outono
3,2018-01-01,11066601201,Santo António,False,False,False,False,False,False,False,...,False,False,False,False,False,True,True,False,False,False
4,2018-01-01,11060200605,Alcântara,False,False,False,False,False,False,False,...,False,False,False,False,False,True,True,False,False,False
5,2018-01-01,11063301908,Olivais,False,False,False,True,False,False,False,...,False,False,False,False,False,True,True,False,False,False
11,2018-01-01,11063301802,Olivais,False,False,False,False,False,False,False,...,False,False,False,False,False,True,True,False,False,False
25,2018-01-01,11066600102,Santo António,False,False,False,False,False,False,True,...,False,False,False,False,False,True,True,False,False,False
26,2018-01-01,11063301002,Olivais,False,False,False,False,False,False,False,...,False,False,False,False,False,True,True,False,False,False
28,2018-01-01,11060200705,Alcântara,False,False,False,False,False,False,False,...,False,False,False,False,False,True,True,False,False,False
35,2018-01-01,11063302305,Olivais,False,True,False,False,False,False,False,...,False,False,False,False,False,True,True,False,False,False
68,2018-01-02,11060200705,Alcântara,False,False,False,False,False,False,False,...,True,False,False,False,False,False,True,False,False,False
76,2018-01-02,11063300905,Olivais,False,False,False,False,False,False,True,...,False,False,False,False,False,False,True,False,False,False
