In [1]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import geopandas as gpd

from multiprocessing import Pool

import cartopy.feature as cf
import cartopy.crs as ccrs

from matplotlib import patches
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

## Export CPTEC rain gauges to clear directory

In [2]:
year = 2024

SA_lon_min, SA_lon_max, SA_lat_min, SA_lat_max = -83,-33,-55,6

In [3]:
dir_base = os.path.join('/','media','arturo','Arturo','Data','Brazil','OBS_CPTEC','1hr')

In [4]:
list_files = glob.glob(os.path.join(dir_base,str(year),'raw','*.txt'))
list_files = sorted(list_files)
list_len = len(list_files)
print(f'Number of files: {list_len}')

Number of files: 12


In [5]:
filename = os.path.join(dir_base,list_files[-1])
DATA = pd.read_csv(filename,skiprows=2,skipfooter=3,sep=' ',engine='python')
DATA = DATA[(DATA.lon>-83)&(DATA.lon<-33)&(DATA.lat>=-55)&(DATA.lat<=6)]
DATA.reset_index(drop=True,inplace=False)
DATA.head(3)

Unnamed: 0,code,lon,lat,alt,date,r
0,14540000,-60.4706,4.6306,0,2024120100,0.0
1,140070401A,-60.163,4.5945,-9999,2024120100,0.0
2,14530000,-60.7939,4.1961,0,2024120100,0.0


In [6]:
code_list = DATA['code'].values 
code_unique = np.unique(code_list)
print(f'Year    : {year}')
print(f'Stations: {len(code_unique)}')

Year    : 2024
Stations: 6022


In [7]:
def search_concat(code_input, DATA):
    print(f'Station: {code_input}')
    df_list = []
    
    nameout = f'obs_cptec_{code_input}.csv'
    dir_out = os.path.join(dir_base,str(year),'clear',nameout)
    
    if os.path.exists(dir_out):
        # print('File Exists')
        return
    else:
        for nn in range(list_len):
            filename = os.path.join(dir_base,list_files[nn])
            DATA = pd.read_csv(filename,skiprows=2,skipfooter=3,sep=' ',engine='python')
            DATA_ref = DATA[DATA['code']==code_input]
            if not DATA_ref.empty:
                df_list.append(DATA_ref)
    
    DATA_all = pd.concat(df_list, ignore_index=True)
    DATA_all['date'] = pd.to_datetime(DATA_all['date'].astype(str), format='%Y%m%d%H')
    
    DATA_all['lat'] = DATA_all['lat'].round(2)
    DATA_all['lon'] = DATA_all['lon'].round(2)
    
    # modulo para remover tiempos duplicados
    df_sorted = DATA_all.sort_values(by='r', ascending=False)
    df_unique = df_sorted.drop_duplicates(subset=['date'], keep='first').reset_index(drop=True)
    
    df_final = df_unique.sort_values(by='date').reset_index(drop=True)
    
    df_final.to_csv(dir_out, header=True, index=False)
    
    return DATA_all

In [None]:
def compute_for_point(args):
    code_input = args
    return search_concat(code_input, DATA)

with Pool(processes=10) as pool:
    results = pool.map(compute_for_point, [(code_input) for code_input in code_unique])

Station: 14515000Station: 21770080Station: -9999Station: 15700000Station: 18409650Station: 22685000Station: 24735000Station: 260340501AStation: 261110103AStation: 270430213A









Station: 02042051
Station: 15710000
Station: 270430215A
Station: 260345401A
Station: 261110103C
Station: 24735100
Station: 22735000
Station: 14526400
Station: 18415000
Station: 261110104AStation: 260345401GStation: 02142096Station: 21780080Station: 270430216AStation: 15720000Station: 24743000Station: 22735050Station: 14527000



Station: 18420000

Station: 261110105A
Station: 1




Station: 15740000Station: 260345402AStation: 270470801AStation: 21780180Station: 24744000Station: 18421000
Station: 14530000
Station: 261110106AStation: 22735060Station: 10


Station: 260345404A

Station: 15750500


Station: 21780200Station: 270510101AStation: 24744900
Station: 18421600
Station: 14540000Station: 22735080Station: 261120001AStation: 10100000




Station: 15750550
Station: 260345405A



Station: 21780240Station: 27

## OLD code only for one file at a time

In [None]:
# for pos in range(len(code_unique)):
#     print(f'Station: {code_unique[pos]}')
#     df_list = []

#     nameout = f'obs_cptec_{code_unique[pos]}.csv'
#     dir_out = os.path.join(dir_base,str(year),'clear',nameout)

#     if os.path.exists(dir_out):
#             continue
#     else:
#         for nn in range(list_len):
#             filename = os.path.join(dir_base,list_files[nn])
#             DATA = pd.read_csv(filename,skiprows=2,skipfooter=3,sep=' ',engine='python')
#             DATA_ref = DATA[DATA['code']==code_unique[pos]]
#             if not DATA_ref.empty:
#                 df_list.append(DATA_ref)

#     DATA_all = pd.concat(df_list, ignore_index=True)
#     DATA_all['date'] = pd.to_datetime(DATA_all['date'].astype(str), format='%Y%m%d%H')
#     DATA_all.to_csv(dir_out, header=True, index=False)