In [16]:
import pandas as pd

# 1. Load your data
df = pd.read_csv("datafinal.csv")

df_chau_doc=pd.read_csv("df_chau_doc.csv")
df_tan_chau=pd.read_csv("df_tan_chau.csv")
# 2. Put dataframes in a dictionary for easier handling
# (This is safer than using vars())
df_tan_chau=df_tan_chau.drop(columns=['point_id'])
df_tan_chau['RIV_TAN_CHAU']=df_tan_chau['value']
df_chau_doc=df_chau_doc.drop(columns=['point_id'])
df_chau_doc['RIV_CHAU_DOC']=df_chau_doc['value']
data_dict = {

    "chau_doc": df_chau_doc,
    "tan_chau": df_tan_chau
}

# 3. Loop, Rename, and Merge
for name, sub_df in data_dict.items():
    # Make sure 'time' is the only common column
    # We rename all OTHER columns to have the station name prefix (e.g., 'zos' -> 'caibe_zos')
    sub_df = sub_df.set_index('time')
    sub_df = sub_df.add_prefix(f"{name}_")
    sub_df = sub_df.reset_index()
    
    # Now merge. No suffix collision will happen because column names are unique!
    df = df.merge(
        sub_df,
        on='time',
        how='outer'
    )

# 4. Fill NA
df = df.fillna(0)

# Check results
print(df.head())

          h3_index        time  month  rain_mm      solar     temp_c  \
0  876584024ffffff  2022-01-01      1      0.0  20.105776  26.328880   
1  876584025ffffff  2022-01-01      1      0.0  20.105776  26.328880   
2  876584100ffffff  2022-01-01      1      0.0  20.165876  26.472355   
3  876584101ffffff  2022-01-01      1      0.0  20.165876  26.472355   
4  876584104ffffff  2022-01-01      1      0.0  20.219193  26.338972   

   temp_max_c  temp_min_c  rh_percent  caibe_zos  ...  salinity_mean_new  \
0   30.275843   21.888361   62.440125   1.160924  ...           0.452810   
1   30.275843   21.888361   62.440125   1.160924  ...           0.464342   
2   30.488733   22.028986   62.081017   1.160924  ...           0.474906   
3   30.488733   22.028986   62.081017   1.160924  ...           0.537060   
4   30.389124   21.884455   63.914448   1.160924  ...           0.542740   

   salinity_std_new  pct_salinity_pixels_new  pct_salinity_gte_0_2_new  \
0          0.165993                6

In [17]:
df.shape

(2540035, 60)

In [7]:
df.head()

Unnamed: 0,h3_index,time,month,rain_mm,solar,temp_c,temp_max_c,temp_min_c,rh_percent,caibe_zos,...,salinity_min_new,salinity_max_new,salinity_mean_new,salinity_std_new,pct_salinity_pixels_new,pct_salinity_gte_0_2_new,pct_salinity_gte_0_5_new,pct_salinity_gte_1_0_new,chau_doc_value,tan_chau_value
0,876584024ffffff,2022-01-01,1,0.0,20.105776,26.32888,30.275843,21.888361,62.440125,1.160924,...,-0.152727,0.824519,0.45281,0.165993,68.06,91.007934,46.782251,0.0,1.68,1.54
1,876584025ffffff,2022-01-01,1,0.0,20.105776,26.32888,30.275843,21.888361,62.440125,1.160924,...,-0.229349,0.801148,0.464342,0.1704,68.111111,92.39211,47.085867,0.0,1.68,1.54
2,876584100ffffff,2022-01-01,1,0.0,20.165876,26.472355,30.488733,22.028986,62.081017,1.160924,...,-0.954194,0.745806,0.474906,0.153528,68.40404,93.044891,57.457177,0.0,1.68,1.54
3,876584101ffffff,2022-01-01,1,0.0,20.165876,26.472355,30.488733,22.028986,62.081017,1.160924,...,-0.172988,0.811902,0.53706,0.164505,67.435644,94.185876,70.004405,0.0,1.68,1.54
4,876584104ffffff,2022-01-01,1,0.0,20.219193,26.338972,30.389124,21.884455,63.914448,1.160924,...,-0.153881,0.787094,0.54274,0.174023,68.777778,93.508592,66.485534,0.0,1.68,1.54


In [20]:
df.columns

Index(['h3_index', 'time', 'month', 'rain_mm', 'solar', 'temp_c', 'temp_max_c',
       'temp_min_c', 'rh_percent', 'caibe_zos', 'cailon_zos', 'cuadai_zos',
       'cuatieu_zos', 'dinhan_zos', 'ganhhao_zos', 'trande_zos', 'pct_tree',
       'pct_shrub', 'pct_grass', 'pct_crop', 'pct_built', 'pct_bare',
       'pct_snow_ice', 'pct_water', 'pct_wetland', 'pct_mangrove',
       'pct_moss_lichen', 'pct_water_river', 'dem_min', 'dem_max', 'dem_mean',
       'slope_min', 'slope_max', 'slope_mean', 'pct_slope_gt_1deg',
       'salinity_min', 'salinity_max', 'salinity_mean', 'salinity_std',
       'pct_salinity_pixels', 'pct_salinity_gte_0_2', 'pct_salinity_gte_0_5',
       'pct_salinity_gte_1_0', 'no_data_land', 'is_sea', 'landcover_label',
       'landcover_label_refined', 'salinity_risk', 'salinity_min_new',
       'salinity_max_new', 'salinity_mean_new', 'salinity_std_new',
       'pct_salinity_pixels_new', 'pct_salinity_gte_0_2_new',
       'pct_salinity_gte_0_5_new', 'pct_salinity_gte_1_0

In [21]:
df=df.drop(columns=['chau_doc_RIV_CHAU_DOC', 'tan_chau_RIV_TAN_CHAU'])

In [22]:
df.columns

Index(['h3_index', 'time', 'month', 'rain_mm', 'solar', 'temp_c', 'temp_max_c',
       'temp_min_c', 'rh_percent', 'caibe_zos', 'cailon_zos', 'cuadai_zos',
       'cuatieu_zos', 'dinhan_zos', 'ganhhao_zos', 'trande_zos', 'pct_tree',
       'pct_shrub', 'pct_grass', 'pct_crop', 'pct_built', 'pct_bare',
       'pct_snow_ice', 'pct_water', 'pct_wetland', 'pct_mangrove',
       'pct_moss_lichen', 'pct_water_river', 'dem_min', 'dem_max', 'dem_mean',
       'slope_min', 'slope_max', 'slope_mean', 'pct_slope_gt_1deg',
       'salinity_min', 'salinity_max', 'salinity_mean', 'salinity_std',
       'pct_salinity_pixels', 'pct_salinity_gte_0_2', 'pct_salinity_gte_0_5',
       'pct_salinity_gte_1_0', 'no_data_land', 'is_sea', 'landcover_label',
       'landcover_label_refined', 'salinity_risk', 'salinity_min_new',
       'salinity_max_new', 'salinity_mean_new', 'salinity_std_new',
       'pct_salinity_pixels_new', 'pct_salinity_gte_0_2_new',
       'pct_salinity_gte_0_5_new', 'pct_salinity_gte_1_0

In [23]:
df.to_csv("database.csv", index=False)

In [24]:
df.tail()

Unnamed: 0,h3_index,time,month,rain_mm,solar,temp_c,temp_max_c,temp_min_c,rh_percent,caibe_zos,...,salinity_min_new,salinity_max_new,salinity_mean_new,salinity_std_new,pct_salinity_pixels_new,pct_salinity_gte_0_2_new,pct_salinity_gte_0_5_new,pct_salinity_gte_1_0_new,chau_doc_value,tan_chau_value
2540030,8765b5b6affffff,2022-12-31,12,5.443499,10.59636,24.564196,26.552332,22.736109,79.23716,1.097751,...,-0.975219,0.904671,0.403516,0.242717,68.397959,87.21468,31.73206,0.0,1.11,1.04
2540031,8765b5b6bffffff,2022-12-31,12,5.443499,10.59636,24.564196,26.552332,22.736109,79.23716,1.097751,...,0.007471,0.917507,0.490029,0.148079,67.717172,98.299523,43.630668,0.0,1.11,1.04
2540032,8765b5b6cffffff,2022-12-31,12,4.10048,10.59636,24.564196,26.552332,22.736109,79.23716,1.097751,...,-0.571163,0.90722,0.480744,0.308307,68.367347,85.462687,44.597015,0.0,1.11,1.04
2540033,8765b5b6dffffff,2022-12-31,12,4.10048,10.59636,24.564196,26.552332,22.736109,79.23716,1.097751,...,-0.07517,0.898285,0.461544,0.278935,68.377551,79.495598,39.307566,0.0,1.11,1.04
2540034,8765b5b6effffff,2022-12-31,12,5.443499,10.59636,24.564196,26.552332,22.736109,79.23716,1.097751,...,-0.972994,0.909437,0.141943,0.330122,68.05102,51.68691,11.216074,0.0,1.11,1.04


In [None]:
dfdo = pd.read_csv("FACT_POINT_DATA.csv")

In [None]:
dfdo.head()

Unnamed: 0,point_id,date,value
0,RIV_CHAU_DOC,2022-01-01,1.68
1,RIV_CHAU_DOC,2022-01-02,1.86
2,RIV_CHAU_DOC,2022-01-03,1.95
3,RIV_CHAU_DOC,2022-01-04,1.93
4,RIV_CHAU_DOC,2022-01-05,1.8


In [None]:


df_chau_doc = dfdo[dfdo['point_id'] == 'RIV_CHAU_DOC']
df_tan_chau = dfdo[dfdo['point_id'] == 'RIV_TAN_CHAU']

In [None]:
df_chau_doc.to_csv("df_chau_doc.csv", index=False)
df_tan_chau.to_csv("df_tan_chau.csv", index=False)

In [None]:
df = df.merge(
        df_chau_doc,
        on='time',
        how='outer'
    )
df = df.fillna(0)

MemoryError: Unable to allocate 775. MiB for an array with shape (40, 2540035) and data type float64

In [None]:
df = df.merge(
        df_chac,
        on='time',
        how='outer'
    )
df = df.fillna(0)

In [11]:
df=pd.read_csv("datafinal.csv")

In [13]:
df.columns

Index(['h3_index', 'time', 'month', 'rain_mm', 'solar', 'temp_c', 'temp_max_c',
       'temp_min_c', 'rh_percent', 'caibe_zos', 'cailon_zos', 'cuadai_zos',
       'cuatieu_zos', 'dinhan_zos', 'ganhhao_zos', 'trande_zos', 'pct_tree',
       'pct_shrub', 'pct_grass', 'pct_crop', 'pct_built', 'pct_bare',
       'pct_snow_ice', 'pct_water', 'pct_wetland', 'pct_mangrove',
       'pct_moss_lichen', 'pct_water_river', 'dem_min', 'dem_max', 'dem_mean',
       'slope_min', 'slope_max', 'slope_mean', 'pct_slope_gt_1deg',
       'salinity_min', 'salinity_max', 'salinity_mean', 'salinity_std',
       'pct_salinity_pixels', 'pct_salinity_gte_0_2', 'pct_salinity_gte_0_5',
       'pct_salinity_gte_1_0', 'no_data_land', 'is_sea', 'landcover_label',
       'landcover_label_refined', 'salinity_risk', 'salinity_min_new',
       'salinity_max_new', 'salinity_mean_new', 'salinity_std_new',
       'pct_salinity_pixels_new', 'pct_salinity_gte_0_2_new',
       'pct_salinity_gte_0_5_new', 'pct_salinity_gte_1_0

In [15]:
df['value']

KeyError: 'value'