In [238]:
import os
import pandas as pd
import numpy as np

In [239]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#current_directory = os.getcwd()
current_directory  = '/content/drive/MyDrive/Artigos/IQA-Belém'

In [252]:
# Seleção de pasta
year = '2019-2024'
band = 'uvai' # co, no2, o3, so2, uvai
folder_name = f'dados/{band}'

In [253]:
# Seleção de arquivos
folder_path = os.path.join(current_directory, folder_name)
files = os.listdir(folder_path)
csv_files = [f for f in files if f.endswith('.csv')]

dataframes = {}

for csv_file in csv_files:
    print(csv_file)
    file_path = os.path.join(folder_path, csv_file)
    df = pd.read_csv(file_path)
    key = os.path.splitext(csv_file)[0]
    name = key.replace(band+'_', '')
    name = name.replace('_'+year, '')
    df.rename(columns={df.columns[1] : name}, inplace=True)
    dataframes[key] = df


uvai_arapiranga_2019-2024.csv
uvai_belem_2019-2024.csv
uvai_carateua_2019-2024.csv
uvai_combu_2019-2024.csv
uvai_cotijuba_2019-2024.csv
uvai_da-barra_2019-2024.csv
uvai_grande_2019-2024.csv
uvai_das-oncas_2019-2024.csv
uvai_da-cintra_2019-2024.csv
uvai_mirim_2019-2024.csv
uvai_mosqueiro_2019-2024.csv
uvai_murucutu_2019-2024.csv
uvai_paqueta_2019-2024.csv
uvai_sao-pedro_2019-2024.csv
uvai_jutuba_2019-2024.csv


In [254]:
# Limpar campos vazios e manter todas as datas
for key, df in dataframes.items():
    #df['date'] = pd.to_datetime(df['date'])

    unique_dates = df[['date']].drop_duplicates(ignore_index=True).sort_values(by='date')

    df_cleaned = df.dropna(subset=[df.columns[1]], ignore_index=True)

    merged_df = pd.merge(unique_dates, df_cleaned, on='date', how='left')

    dataframes[key] = merged_df.sort_values(by='date').reset_index(drop=True)

In [None]:
# Teste
for key, df in dataframes.items():
    print(f" Data in {key}:")
    print(df.head())
    print(df.tail())

In [None]:
# Soma de valores nulos
for key, df in dataframes.items():
    print(f" Nan in {key}:")
    print(df.isna().sum())

In [255]:
# Interpolação dos valores faltantes
for key, df in dataframes.items():
    df.infer_objects(copy=False)
    df.interpolate(method='linear', inplace=True)
    med = df[df.columns[1]].median()
    print(med)
    df.fillna(med, inplace=True) # substituir nulos restantes por mediana


-10309.722879490517
-8896.872047100745
-10287.756355404243
-11885.73113246072
-11379.144254944056
-7999.884606076233
-12525.857041849675
-10755.181341054642
-11818.22643514991
-9481.15247748124
-10893.705884706102
-12355.18707747361
-10511.785010504478
-10930.665077642818
-10778.52743975995


In [256]:
# mudar escala dos dados baixados (em 10e-4) para 1 mol/m²
for key, df in dataframes.items():
  dataframes[key][df.columns[1]] = df[df.columns[1]].apply(lambda x: x*0.0001)

In [257]:
# set date as index
for key, df in dataframes.items():
    df.set_index('date', inplace=True)

In [258]:
# Merge dataframes
merged_df = None

for key, df in dataframes.items():
    if merged_df is None:
        merged_df = df
    else:
        merged_df = merged_df.merge(df, left_index=True, right_index=True)

In [259]:
# ordem alfabetica
ordem_ab = sorted(merged_df.columns)
merged_df = merged_df[ordem_ab]

In [None]:
merged_df.head()

Unnamed: 0_level_0,arapiranga,belem,carateua,combu,cotijuba,da-barra,da-cintra,das-oncas,grande,jutuba,mirim,mosqueiro,murucutu,paqueta,sao-pedro
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2019-01-01,3.1e-05,3.6e-05,4e-05,3.5e-05,3.6e-05,3.6e-05,3.5e-05,3.2e-05,3.4e-05,3.2e-05,3.3e-05,3.2e-05,3.5e-05,3.2e-05,4.1e-05
2019-01-02,3.6e-05,3.4e-05,3.7e-05,3.8e-05,3.8e-05,4.1e-05,3.7e-05,3.4e-05,4.3e-05,3.6e-05,3.7e-05,3.9e-05,4.3e-05,3.7e-05,3.5e-05
2019-01-03,3.6e-05,4e-05,3.1e-05,2.9e-05,3.3e-05,3.7e-05,2.9e-05,4.4e-05,3.2e-05,3e-05,4e-05,3.7e-05,2.5e-05,4e-05,5.6e-05
2019-01-04,4.8e-05,3.4e-05,4e-05,3.5e-05,3.6e-05,3.3e-05,3.6e-05,3.5e-05,4.3e-05,2.9e-05,4.9e-05,3.6e-05,4e-05,2.8e-05,3.6e-05
2019-01-05,4.1e-05,3.7e-05,3.7e-05,4e-05,4.1e-05,4.8e-05,4.1e-05,3.4e-05,3.4e-05,3e-05,3.1e-05,3.4e-05,3.5e-05,3.2e-05,4e-05


In [260]:
# save processed file to csv
merged_df.to_csv(f'{folder_path}/results/{band}_{year}_df.csv')

In [261]:
df_describe = merged_df.describe().round(2)

In [262]:
df_describe.to_csv(f'{folder_path}/results/{band}_{year}_describe.csv')

In [None]:
# Select folder (place_year/band)
year = '2019-2024'
band = 'co' # co, no2, o3, so2
folder_name = f'dados/{band}/results'
file_name = f'{folder_name}/{band}_{year}_df.csv'

In [None]:
folder_path = os.path.join(current_directory, folder_name)
file_path = os.path.join(current_directory, file_name)

In [None]:
df = pd.read_csv(file_path)

In [None]:
df.set_index('date', inplace=True)

In [None]:
match band:
      case 'co':
        mass = 28.01
      case 'no2':
        mass = 46.01
      case 'so2':
        mass = 64.07
      case 'o3':
        mass = 48.00

In [None]:
def mol_per_m2_to_ppm(mol_per_m2):

    # Step 1: Convert mol/m² to g/m² using the molar mass
    g_per_m2 = mol_per_m2 * mass

    # Step 2: Convert g/m² to mg/m²
    mg_per_m2 = g_per_m2 * 1000

    # Step 3: Convert mol/m² to mol/L using the molar volume at STP (22.414 L/mol)
    molar_volume_L_per_mol = 24.876# 22.414
    mol_per_L = mol_per_m2 / molar_volume_L_per_mol

    # Step 4: Convert mol/L to mg/L (which is equivalent to ppm)
    concentration_ppm = mol_per_L * mass * 1000

    return concentration_ppm


In [None]:
new_df = df.applymap(mol_per_m2_to_ppm)
print(new_df)

            arapiranga      belem   carateua      combu   cotijuba   da-barra  \
date                                                                            
2019-01-01   36.367472  36.098642  33.602176  32.873359  37.557158  36.410515   
2019-01-02   32.391033  32.395853  32.744273  33.591033  32.874995  32.400715   
2019-01-03   36.177231  30.942574  33.661922  31.674212  30.523954  24.833258   
2019-01-04   32.815488  31.124611  34.238594  37.970556  36.863156  27.811815   
2019-01-05   38.154395  31.823644  33.079249  36.672611  33.013631  30.790372   
...                ...        ...        ...        ...        ...        ...   
2023-12-27   38.265573  38.154369  38.228402  34.644138  39.579863  35.915628   
2023-12-28   39.273340  38.257239  39.332598  33.564014  40.849110  35.487128   
2023-12-29   35.022944  39.528890  32.441960  40.501463  42.118358  38.471277   
2023-12-30   35.022944  42.623307  32.441960  43.078227  42.118358  41.455426   
2023-12-31   35.022944  44.2

            arapiranga     belem  carateua     combu  cotijuba  da-barra  \
date                                                                       
2019-01-01    0.032298  0.032060  0.029842  0.029195  0.033355  0.032337   
2019-01-02    0.028767  0.028771  0.029081  0.029833  0.029197  0.028775   
2019-01-03    0.032129  0.027480  0.029896  0.028130  0.027109  0.022055   
2019-01-04    0.029144  0.027642  0.030408  0.033722  0.032739  0.024700   
2019-01-05    0.033885  0.028263  0.029378  0.032569  0.029320  0.027345   
...                ...       ...       ...       ...       ...       ...   
2023-12-27    0.033984  0.033885  0.033951  0.030768  0.035151  0.031897   
2023-12-28    0.034879  0.033977  0.034932  0.029809  0.036279  0.031517   
2023-12-29    0.031104  0.035106  0.028812  0.035970  0.037406  0.034167   
2023-12-30    0.031104  0.037854  0.028812  0.038258  0.037406  0.036817   
2023-12-31    0.031104  0.039272  0.028812  0.038258  0.037406  0.036817   

           

In [None]:
df.to_csv(f'{folder_path}/results/{band}_{year}_ppm_df.csv')