# Setup

In [1]:
import pandas as pd
import numpy as np
import os

# Program

## EDA

In [3]:
one_year_data = pd.read_csv("../data/final/oneyear_data.csv")

In [4]:
one_year_data.describe()

Unnamed: 0,YEAR,MO,DY,HR,T2M,WS2M,RH2M,PRECTOTCORR,LAT,LONG
count,28899360.0,28899360.0,28899360.0,28899360.0,28899360.0,28899360.0,28899360.0,28899360.0,28899360.0,28899360.0
mean,2023.754,6.513661,15.75683,11.5,19.36495,-4.928667,73.60668,-7.92754,-2.6625,117.6595
std,0.4306205,3.451234,8.81153,6.922187,92.59959,90.39623,97.78027,90.101,5.049753,13.56696
min,2023.0,1.0,1.0,0.0,-999.0,-999.0,-999.0,-999.0,-11.1625,94.4095
25%,2024.0,4.0,8.0,5.75,27.08,1.46,78.31,0.02,-7.1625,105.9095
50%,2024.0,7.0,16.0,11.5,28.27,3.09,81.94,0.09,-2.6625,117.6595
75%,2024.0,10.0,23.0,17.25,28.97,4.86,85.75,0.25,1.8375,129.4095
max,2024.0,12.0,31.0,23.0,42.47,18.56,100.0,188.85,5.8375,140.9095


In [5]:
one_year_data.rename(columns={'YEAR':'Year', "MO":"Month", "DY":"Day", "HR":"Hour"}, inplace=True)

In [6]:
one_year_data

Unnamed: 0,Year,Month,Day,Hour,T2M,WS2M,RH2M,PRECTOTCORR,LAT,LONG
0,2023,10,3,0,27.31,6.12,75.62,0.01,-0.1625,94.4095
1,2023,10,3,1,27.23,5.98,75.44,0.01,-0.1625,94.4095
2,2023,10,3,2,27.18,5.88,75.12,0.00,-0.1625,94.4095
3,2023,10,3,3,27.15,5.99,75.19,0.01,-0.1625,94.4095
4,2023,10,3,4,27.15,6.23,75.38,0.01,-0.1625,94.4095
...,...,...,...,...,...,...,...,...,...,...
28899355,2024,10,2,19,29.15,1.57,74.50,0.02,5.8375,140.9095
28899356,2024,10,2,20,29.16,1.80,74.94,0.01,5.8375,140.9095
28899357,2024,10,2,21,29.25,2.20,75.56,0.03,5.8375,140.9095
28899358,2024,10,2,22,29.25,2.59,76.44,0.02,5.8375,140.9095


In [7]:
one_year_data["DATETIME"] = pd.to_datetime(one_year_data[["Year", "Month", "Day", "Hour"]])

In [8]:
one_year_data.sort_values(by="DATETIME", inplace=True)

## Split Data

In [9]:
pivot = round(0.9*len(one_year_data))

In [10]:
train_data = one_year_data.iloc[:pivot]
test_data = one_year_data.iloc[pivot:]

## Modeling

In [11]:
from xgboost import XGBRegressor
import xgboost as xgb
import pandas as pd
# from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [12]:
X_test = test_data.drop(columns=["DATETIME", "T2M", "WS2M", "RH2M", "PRECTOTCORR"])
y_test = test_data["T2M"]

In [13]:
X_train = train_data.drop(columns=["DATETIME", "T2M", "WS2M", "RH2M", "PRECTOTCORR"])
y_train = train_data["T2M"]

In [14]:
model = XGBRegressor(n_estimators=100)

In [15]:
model.fit(X_train, y_train)

In [16]:
y_hat = model.predict(X_test)

In [17]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 1.14


In [97]:
data_sample =  np.array([[2023, 11, 7, 4, -6.914744, 107.609810]]) #bandung

In [98]:
# Make predictions using the trained model
predictions = model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[25.0566]


In [361]:
model.save_model('../model/temperature_model.json')

### Windspeed model

In [302]:
windspeed_model = XGBRegressor(n_estimators=100)

In [303]:
X_train = train_data.drop(columns=["DATETIME", "WS2M"])
y_train = train_data["WS2M"]

In [304]:
X_test = test_data.drop(columns=["DATETIME", "WS2M"])
y_test = test_data["WS2M"]   

In [305]:
windspeed_model.fit(X_train, y_train)

In [306]:
y_hat = windspeed_model.predict(X_test)

In [307]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 0.88


In [308]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 49.0, 0, -6.914744, 107.609810]]) # bandung

In [309]:
# Make predictions using the trained model
predictions = windspeed_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[3.176307]


In [310]:
windspeed_model.save_model('../model/windspeed_model.json')

### Humidity Model

In [386]:
humidity_model = XGBRegressor(n_estimators=100)

In [387]:
X_train = train_data.drop(columns=["DATETIME", "RH2M"])
y_train = train_data["RH2M"]

In [388]:
X_test = test_data.drop(columns=["DATETIME", "RH2M"])
y_test = test_data["RH2M"]   

In [389]:
humidity_model.fit(X_train, y_train)

In [390]:
y_hat = humidity_model.predict(X_test)

In [391]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 1.62


In [392]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 3.61, 0, -6.914744, 107.609810]]) # bandung

In [393]:
# Make predictions using the trained model
predictions = humidity_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[56.728752]


In [397]:
humidity_model.save_model('../model/humidity_model.json')

### Precipitation Model

In [95]:
precipitation_model = XGBRegressor()

In [96]:
X_train = train_data.drop(columns=["DATETIME", "PRECTOTCORR"])
y_train = train_data["PRECTOTCORR"]

In [97]:
X_test = test_data.drop(columns=["DATETIME", "PRECTOTCORR"])
y_test = test_data["PRECTOTCORR"]   

In [98]:
precipitation_model.fit(X_train, y_train)

In [99]:
y_hat = precipitation_model.predict(X_test)

In [100]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 0.29


In [394]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 3.61, 49.0, -6.914744, 107.609810]]) # bandung

In [395]:
# Make predictions using the trained model
predictions = precipitation_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[0.07967559]


In [396]:
precipitation_model.save_model('../model/precipitation_model.json')