In [37]:
import os
import xarray as xr
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import MinMaxScaler
from prophet.plot import plot_plotly,plot_components_plotly
from prophet import Prophet

In [38]:
def calc_index(given_lat,given_lon):
    lon_index = int(((np.floor(given_lon/0.625) * 0.625) - (-180))/0.625)
    lon_index = 0 if lon_index == 576 else lon_index
    lat_index = int(((np.floor(given_lat/0.5) * 0.5) - (-90))/0.5)
    return lat_index,lon_index

In [39]:
def get_conc(folder_name,lat_index,lon_index):
    conc_column = np.empty((0,),dtype='float32')
    data_folder = os.listdir(f'../data/{folder_name}')
    for data_file in data_folder:
        data = xr.open_dataset(f"../data/{folder_name}/{data_file}")
        if folder_name == 'aqi1':
            code = 'DUSMASS'
        elif folder_name == 'aqi2':
            code = 'COSC'
        elif folder_name == 'aqi3':
            code = 'TO3'
        elif folder_name == 'moisture':
            code = 'SFMC'
        elif folder_name == 'temperature':
            code = 'TLML'
        else:
            code = 'Invalid'
        conc_data = np.array(data[code].values)[0]
        
        if folder_name == 'moisture':
            nan_indices = np.isnan(conc_data)
            conc_data[nan_indices] = 0
            
        conc_val = conc_data[lat_index][lon_index]
        conc_column = np.append(conc_column,conc_val)
    return conc_column

In [40]:
date_column = pd.date_range(start='1/1/2005',end='12/1/2023',freq='MS')

In [41]:
given_lat= 20.7832
given_lon= 85.5085
lat_index,lon_index = calc_index(given_lat,given_lon)

In [42]:
pm_conc = get_conc('aqi1',lat_index,lon_index)
co_conc = get_conc('aqi2',lat_index,lon_index)
o3_conc = get_conc('aqi3',lat_index,lon_index)
moisture_conc = get_conc('moisture',lat_index,lon_index)
temperature_conc = get_conc('temperature',lat_index,lon_index)

In [43]:
pm_conc = pm_conc * 1e9            # 1 kg/m^3 = 1e9 ug/m^3
co_conc = co_conc * 1.15 * 1e-3    # 1ppb = 1.15 * 1e-3 mg/m^3 for CO
o3_conc = o3_conc * 0.1            # 10 percent of total ozone column

In [44]:
def calc_aqi(pm_conc,co_conc,o3_conc):
    min_aqi = [0,51,101,201,301,401]
    max_aqi = [50,100,200,300,400,500]
    min_pm_conc = [0,31,61,91,121,251]
    max_pm_conc = [30,60,90,120,250,500]
    min_co_conc = [0,1.1,2.1,10.1,17.1,34.1]
    max_co_conc = [1,2,10,17,34,50]
    min_o3_conc = [0,51,101,169,209,748]
    max_o3_conc = [50,100,168,208,747,1000]
    
    # pm_aqi calculation
    i=0
    pm_aqi = np.full(len(pm_conc),0,dtype=float)
    for pm_val in pm_conc:
        for index,min_pm_val in enumerate(min_pm_conc):
            if pm_val > min_pm_val:
                pm_aqi[i] = ( (max_aqi[index] - min_aqi[index])/(max_pm_conc[index]-min_pm_val) ) * (pm_val - min_pm_val) + min_aqi[index]
            else :
                break
        i = i+1

    # co_aqi calculation
    i=0
    co_aqi = np.full(len(co_conc),0,dtype=float)
    for co_val in co_conc:
        for index,min_co_val in enumerate(min_co_conc):
            if co_val > min_co_val:
                co_aqi[i] = ( (max_aqi[index] - min_aqi[index])/(max_co_conc[index]-min_co_val) ) * (co_val - min_co_val) + min_aqi[index]
            else :
                break
        i = i+1

    # o3_aqi calculation
    i=0
    o3_aqi = np.full(len(o3_conc),0,dtype=float)
    for o3_val in o3_conc:
        for index,min_o3_val in enumerate(min_o3_conc):
            if o3_val > min_o3_val:
                o3_aqi[i] = ( (max_aqi[index] - min_aqi[index])/(max_o3_conc[index]-min_o3_val) ) * (o3_val - min_o3_val) + min_aqi[index]
            else : 
                break
        i = i+1 

    aqi = np.maximum(pm_aqi,np.maximum(co_aqi,o3_aqi))
    return aqi

In [45]:
aqi = calc_aqi(pm_conc,co_conc,o3_conc)

In [46]:
df = pd.DataFrame({'pm_conc' : pm_conc, 'co_conc' : co_conc,'o3_conc' : o3_conc, 'aqi' : aqi, 'moisture_conc' : moisture_conc, 'temperature_conc' : temperature_conc}, index=date_column)

In [47]:
df

Unnamed: 0,pm_conc,co_conc,o3_conc,aqi,moisture_conc,temperature_conc
2005-01-01,15.687535,0.194340,25.393068,26.145892,0.186029,294.293091
2005-02-01,20.314533,0.177672,25.164618,33.857555,0.150872,298.814941
2005-03-01,18.073730,0.154295,26.767704,30.122884,0.134153,302.124420
2005-04-01,47.634571,0.129381,27.696100,79.106689,0.112684,304.326355
2005-05-01,45.404739,0.124172,28.217482,75.339042,0.139401,306.058380
...,...,...,...,...,...,...
2023-08-01,88.472443,0.103176,27.058512,194.785235,0.327479,300.256317
2023-09-01,34.768864,0.114889,26.948111,57.368080,0.344008,300.116699
2023-10-01,44.462650,0.133584,26.949987,73.747237,0.324994,298.116913
2023-11-01,27.176672,0.143981,25.817957,45.294453,0.272135,295.378967


In [48]:
df.to_csv('../exports/train_values.csv')

In [49]:
def forecast_conc(conc_df,exact_date):
    conc_df['datetime'] = conc_df.index.astype('str')
    conc_df = conc_df[['datetime','conc']]
    conc_df.reset_index(drop=True, inplace=True)
    conc_df.columns=['ds','y']
    
    scaler = MinMaxScaler()
    conc_df['y'] = scaler.fit_transform(np.array(conc_df[['y']]))
    conc_df
    
    pro_model = Prophet(interval_width=0.95)
    pro_model.fit(conc_df)

    ref_date = datetime(2024,1,1)
    periods = (exact_date.year - ref_date.year) * 12 + exact_date.month + 1 - ref_date.month 
    future_date = pro_model.make_future_dataframe(periods, freq='MS')
    
    forecast = pro_model.predict(future_date)
    predicted_df = pd.DataFrame({'ds' : forecast.ds , 'conc' : forecast.yhat})
    predicted_df.set_index('ds', inplace=True)
    predicted_df.index.name=None

    predicted_df['ds'] = predicted_df.index.astype('str')
    predicted_df = predicted_df[['ds','conc']]
    predicted_df.reset_index(drop=True, inplace=True)
    
    # print(predicted_df.loc[predicted_df.index >= 228,['conc']])
    # np.array(predicted_df.loc[predicted_df.index >= 228,['conc']])
    predicted_df['conc'] = scaler.inverse_transform(predicted_df.loc[:,['conc']])
    return predicted_df[predicted_df['ds'] == str(exact_date.date())].conc.values[0]

In [50]:
exact_date = datetime(2025,4,1)

pm_conc_df = df.loc[:,['pm_conc']]
pm_conc_df.columns=['conc']

co_conc_df = df.loc[:,['co_conc']]
co_conc_df.columns=['conc']

o3_conc_df = df.loc[:,['o3_conc']]
o3_conc_df.columns=['conc']

moisture_conc_df = df.loc[:,['moisture_conc']]
moisture_conc_df.columns=['conc']

temperature_conc_df = df.loc[:,['temperature_conc']]
temperature_conc_df.columns=['conc']

predicted_pm_val = forecast_conc(pm_conc_df, exact_date)
predicted_co_val = forecast_conc(co_conc_df, exact_date)
predicted_o3_val = forecast_conc(o3_conc_df, exact_date)
predicted_moisture_val = forecast_conc(moisture_conc_df, exact_date)
predicted_temperature_val = forecast_conc(temperature_conc_df, exact_date)

predicted_pm_arr = np.array([predicted_pm_val],dtype=float)
predicted_co_arr = np.array([predicted_co_val],dtype=float)
predicted_o3_arr = np.array([predicted_o3_val],dtype=float)
predicted_moisture_arr = np.array([predicted_moisture_val],dtype=float)
predicted_temperature_arr = np.array([predicted_temperature_val],dtype=float)

predicted_aqi_arr = calc_aqi(predicted_pm_arr,predicted_co_arr,predicted_o3_arr)

query_df = pd.DataFrame({'datetime': pd.to_datetime(['2025-04-01']) ,'pm_conc':predicted_pm_arr,'co_conc':predicted_co_arr,'o3_conc':predicted_o3_arr, 'aqi' : predicted_aqi_arr,'moisture_conc' : predicted_moisture_arr, 'temperature_conc' : predicted_temperature_arr})
query_df.set_index('datetime',inplace=True)
query_df.index.name=None

query_df

19:23:45 - cmdstanpy - INFO - Chain [1] start processing
19:23:45 - cmdstanpy - INFO - Chain [1] done processing
19:23:46 - cmdstanpy - INFO - Chain [1] start processing
19:23:46 - cmdstanpy - INFO - Chain [1] done processing
19:23:46 - cmdstanpy - INFO - Chain [1] start processing
19:23:46 - cmdstanpy - INFO - Chain [1] done processing
19:23:46 - cmdstanpy - INFO - Chain [1] start processing
19:23:46 - cmdstanpy - INFO - Chain [1] done processing
19:23:46 - cmdstanpy - INFO - Chain [1] start processing
19:23:46 - cmdstanpy - INFO - Chain [1] done processing


Unnamed: 0,pm_conc,co_conc,o3_conc,aqi,moisture_conc,temperature_conc
2025-04-01,44.541105,0.076497,27.938566,73.879799,0.108921,304.716837


In [51]:
query_df.to_csv('../exports/test_values.csv')