In [None]:
import os
import sys
import glob
import numpy as np
import pandas as pd
import geopandas as gpd

import cartopy.feature as cf
import cartopy.crs as ccrs

from matplotlib import patches
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

In [None]:
dir_base = os.path.join('/','media','arturo','Arturo','Data','Brazil','OBS_CPTEC','1hr')

## Read raw data

In [None]:
list_files = glob.glob(os.path.join(dir_base,'2024','raw','*.txt'))
list_files = sorted(list_files)
list_len = len(list_files)
print(f'Number of files: {list_len}')

In [None]:
filename = os.path.join(dir_base,list_files[-1])
DATA = pd.read_csv(filename,skiprows=2,sep=',')
DATA = DATA[(DATA.lon>-83)&(DATA.lon<-33)&(DATA.lat>=-55)&(DATA.lat<=6)]
DATA.loc[DATA['t'] < 0, 't'] = np.nan
DATA.reset_index(drop=True,inplace=False)

In [None]:
proj = ccrs.PlateCarree()
fig = plt.figure(figsize=(4,4),dpi=300)
gs = gridspec.GridSpec(1,1)

#==================================================================================================
ax1 = plt.subplot(gs[0, 0], projection = proj)
# ax1.set_extent([SP_lon_min, SP_lon_max, SP_lat_min, SP_lat_max],crs=proj)
ax1.add_feature(cf.COASTLINE,linewidth=0.1, linestyle='-',edgecolor='k')
ax1.add_feature(cf.BORDERS, linewidth=0.1, linestyle='-', edgecolor='k')

gl = ax1.gridlines(crs=proj,draw_labels=True,linewidth=0.2,color='gray',alpha=0.5,linestyle='--')
gl.top_labels = False
gl.bottom_labels = True
gl.left_labels = True
gl.right_labels = False
gl.xlabel_style = {'size': 5, 'color': 'k'}
gl.ylabel_style = {'size': 5, 'color': 'k'}

ax1.scatter(DATA['lon'],DATA['lat'],s=1,marker='o',color='b',transform=proj)

ax1.set_title(f'Number of stations: {len(DATA)}', loc='left', fontsize=7)

## Extract and Create Individual Files

In [None]:
code_list = DATA['code'].values 
code_unique = np.unique(code_list)
print(f'Number of stations: {len(code_unique)}')

In [None]:
for pos in range(len(code_unique)):
    print(f'Station: {code_unique[pos]}')
    df_list = []
    for nn in range(list_len):
        filename = os.path.join(dir_base,list_files[nn])
        DATA = pd.read_csv(filename,skiprows=2,sep=',')
        DATA_ref = DATA[DATA['code']==code_unique[pos]]
        if not DATA_ref.empty:
            df_list.append(DATA_ref)

    DATA_all = pd.concat(df_list, ignore_index=True)
    DATA_all['date'] = pd.to_datetime(DATA_all['date'].astype(str), format='%Y%m%d%H')
    dir_out = os.path.join(dir_base,'2024','clear',f'obs_cptec_{code_unique[pos]}.csv')
    DATA_all.to_csv(dir_out, header=True, index=False)

## Complete dates

In [None]:
TIME_REF = pd.date_range(start=f'2024-01-01', end=f'2024-12-31', freq='1h')
TIME_PD = pd.DataFrame({'date':TIME_REF})
print(f'Number of times: {len(TIME_PD)}')