# Setup

In [1]:
import pandas as pd
import numpy as np
import os

# Program

## EDA

In [3]:
daily_year_data = pd.read_csv("../data/final/oneyear_daily_data.csv")

In [4]:
daily_year_data.describe()

Unnamed: 0,YEAR,MO,DY,T2M,WS2M,RH2M,PRECTOTCORR,LAT,LONG
count,383934.0,383934.0,383934.0,383934.0,383934.0,383934.0,383934.0,383934.0,383934.0
mean,2023.754098,6.513661,15.756831,19.459908,-3.686209,71.7762,-4.51804,-8.52046,115.785572
std,0.430621,3.451238,8.811541,92.596452,90.505853,97.472976,90.79559,1.796311,14.009459
min,2023.0,1.0,1.0,-999.0,-999.0,-999.0,-999.0,-11.1625,94.4095
25%,2024.0,4.0,8.0,26.98,2.91,77.81,0.18,-10.1625,102.9095
50%,2024.0,7.0,16.0,28.08,4.59,80.94,0.94,-8.6625,114.9095
75%,2024.0,10.0,23.0,28.88,6.09,83.75,3.83,-7.1625,127.9095
max,2024.0,12.0,31.0,32.29,15.27,99.0,362.46,-4.1625,140.9095


In [5]:
daily_year_data.rename(columns={'YEAR':'Year', "MO":"Month", "DY":"Day", "HR":"Hour"}, inplace=True)

In [6]:
daily_year_data

Unnamed: 0,Year,Month,Day,T2M,WS2M,RH2M,PRECTOTCORR,LAT,LONG
0,2023.0,10.0,3.0,25.96,8.09,75.50,0.14,-10.1625,94.4095
1,2023.0,10.0,4.0,26.10,7.73,73.25,0.23,-10.1625,94.4095
2,2023.0,10.0,5.0,25.94,6.56,81.00,2.06,-10.1625,94.4095
3,2023.0,10.0,6.0,26.16,5.86,82.75,0.71,-10.1625,94.4095
4,2023.0,10.0,7.0,26.22,5.58,83.38,0.83,-10.1625,94.4095
...,...,...,...,...,...,...,...,...,...
383929,2024.0,9.0,28.0,26.98,5.52,84.00,0.09,-9.6625,140.9095
383930,2024.0,9.0,29.0,26.96,6.51,84.12,0.05,-9.6625,140.9095
383931,2024.0,9.0,30.0,26.97,7.19,81.38,0.00,-9.6625,140.9095
383932,2024.0,10.0,1.0,26.73,7.19,80.12,0.16,-9.6625,140.9095


In [7]:
daily_year_data["DATETIME"] = pd.to_datetime(daily_year_data[["Year", "Month", "Day"]])

In [8]:
daily_year_data.sort_values(by="DATETIME", inplace=True)

## Split Data

In [9]:
pivot = round(0.9*len(daily_year_data))

In [10]:
train_data = daily_year_data.iloc[:pivot]
test_data = daily_year_data.iloc[pivot:]

## Modeling

In [11]:
from xgboost import XGBRegressor
import xgboost as xgb
import pandas as pd
# from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [12]:
X_test = test_data.drop(columns=["DATETIME", "T2M", "WS2M", "RH2M", "PRECTOTCORR"])
y_test = test_data["T2M"]

In [13]:
X_train = train_data.drop(columns=["DATETIME", "T2M", "WS2M", "RH2M", "PRECTOTCORR"])
y_train = train_data["T2M"]

In [14]:
model = XGBRegressor(n_estimators=100)

In [15]:
model.fit(X_train, y_train)

In [16]:
y_hat = model.predict(X_test)

In [17]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 0.71


In [27]:
data_sample =  np.array([[2025, 6, 10, -6.914744, 107.609810]]) #bandung

In [28]:
# Make predictions using the trained model
predictions = model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[21.925568]


In [361]:
model.save_model('../model/temperature_model.json')

### Windspeed model

In [302]:
windspeed_model = XGBRegressor(n_estimators=100)

In [303]:
X_train = train_data.drop(columns=["DATETIME", "WS2M"])
y_train = train_data["WS2M"]

In [304]:
X_test = test_data.drop(columns=["DATETIME", "WS2M"])
y_test = test_data["WS2M"]   

In [305]:
windspeed_model.fit(X_train, y_train)

In [306]:
y_hat = windspeed_model.predict(X_test)

In [307]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 0.88


In [308]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 49.0, 0, -6.914744, 107.609810]]) # bandung

In [309]:
# Make predictions using the trained model
predictions = windspeed_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[3.176307]


In [310]:
windspeed_model.save_model('../model/windspeed_model.json')

### Humidity Model

In [386]:
humidity_model = XGBRegressor(n_estimators=100)

In [387]:
X_train = train_data.drop(columns=["DATETIME", "RH2M"])
y_train = train_data["RH2M"]

In [388]:
X_test = test_data.drop(columns=["DATETIME", "RH2M"])
y_test = test_data["RH2M"]   

In [389]:
humidity_model.fit(X_train, y_train)

In [390]:
y_hat = humidity_model.predict(X_test)

In [391]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 1.62


In [392]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 3.61, 0, -6.914744, 107.609810]]) # bandung

In [393]:
# Make predictions using the trained model
predictions = humidity_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[56.728752]


In [397]:
humidity_model.save_model('../model/humidity_model.json')

### Precipitation Model

In [95]:
precipitation_model = XGBRegressor()

In [96]:
X_train = train_data.drop(columns=["DATETIME", "PRECTOTCORR"])
y_train = train_data["PRECTOTCORR"]

In [97]:
X_test = test_data.drop(columns=["DATETIME", "PRECTOTCORR"])
y_test = test_data["PRECTOTCORR"]   

In [98]:
precipitation_model.fit(X_train, y_train)

In [99]:
y_hat = precipitation_model.predict(X_test)

In [100]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 0.29


In [394]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 3.61, 49.0, -6.914744, 107.609810]]) # bandung

In [395]:
# Make predictions using the trained model
predictions = precipitation_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[0.07967559]


In [396]:
precipitation_model.save_model('../model/precipitation_model.json')