# Setup

In [1]:
import pandas as pd
import numpy as np
import os

# Program

## Configuration

In [2]:
latitudes = np.arange(-11.1625, 6.1088, 0.5)
longitudes = np.arange(94.4095, 141.0788, 0.5)

output = "data/raw/" 

# Create a list of coordinates
coordinates = [(lat, lon) for lat in latitudes for lon in longitudes]

# Parameters to download
parameter = 'T2M,WS2M,RH2M,PRECTOTCORR'

# Select the start date, for example, the first day of the year 2023
start_day = '20231003'

# Select the end date, for example, the current date
end_day = '20241002'

# URL for the query, which will download the data in CSV format
base_url = r"https://power.larc.nasa.gov/api/temporal/hourly/point?parameters={parameter}&community=RE&longitude={longitude}&latitude={latitude}&start={start_day}&end={end_day}&format=CSV"

## Merge Data

In [18]:
weather_df = pd.DataFrame()
folder_path = "../data/raw"
for filename in os.listdir(folder_path):
    df = pd.read_csv(folder_path+"/"+filename)
    weather_df = pd.concat([weather_df, df], axis=0, ignore_index=True)

In [19]:
weather_df.drop(columns="Unnamed: 0").to_csv("data/onemonth_data.csv", index=False)

## EDA

In [21]:
one_month_data = pd.read_csv("data/onemonth_data.csv")

In [24]:
one_month_data.describe()

Unnamed: 0,YEAR,MO,DY,HR,T2M,WS2M,RH2M,PRECTOTCORR,LAT,LONG
count,2368800.0,2368800.0,2368800.0,2368800.0,2368800.0,2368800.0,2368800.0,2368800.0,2368800.0,2368800.0
mean,2024.0,9.066667,15.5,11.5,27.42829,3.506543,82.06082,0.2353555,-2.6625,117.6595
std,0.0,0.2494439,8.655443,6.922188,1.98936,2.265664,7.588495,0.8611068,5.049754,13.56697
min,2024.0,9.0,1.0,0.0,3.08,0.0,23.94,0.0,-11.1625,94.4095
25%,2024.0,9.0,8.0,5.75,26.73,1.65,78.06,0.01,-7.1625,105.9095
50%,2024.0,9.0,15.5,11.5,27.81,3.4,81.75,0.07,-2.6625,117.6595
75%,2024.0,9.0,23.0,17.25,28.65,5.22,85.56,0.22,1.8375,129.4095
max,2024.0,10.0,30.0,23.0,38.22,11.96,100.0,45.92,5.8375,140.9095


In [23]:
one_month_data["PRECTOTCORR"].max()

45.92

In [28]:
one_month_data.rename(columns={'YEAR':'Year', "MO":"Month", "DY":"Day", "HR":"Hour"}, inplace=True)

In [29]:
one_month_data

Unnamed: 0,Year,Month,Day,Hour,T2M,WS2M,RH2M,PRECTOTCORR,LAT,LONG
0,2024,9,3,0,29.15,4.24,79.38,0.13,-0.1625,94.4095
1,2024,9,3,1,29.07,4.22,79.62,0.15,-0.1625,94.4095
2,2024,9,3,2,29.05,4.19,79.50,0.15,-0.1625,94.4095
3,2024,9,3,3,29.05,4.25,79.50,0.16,-0.1625,94.4095
4,2024,9,3,4,29.06,4.31,79.38,0.18,-0.1625,94.4095
...,...,...,...,...,...,...,...,...,...,...
2368795,2024,10,2,19,29.15,1.57,74.50,0.02,5.8375,140.9095
2368796,2024,10,2,20,29.16,1.80,74.94,0.01,5.8375,140.9095
2368797,2024,10,2,21,29.25,2.20,75.56,0.03,5.8375,140.9095
2368798,2024,10,2,22,29.25,2.59,76.44,0.02,5.8375,140.9095


In [30]:
one_month_data["DATETIME"] = pd.to_datetime(one_month_data[["Year", "Month", "Day", "Hour"]])

In [32]:
one_month_data = one_month_data.sort_values(by="DATETIME", inplace=True)

Unnamed: 0,Year,Month,Day,Hour,T2M,WS2M,RH2M,PRECTOTCORR,LAT,LONG,DATETIME
0,2024,9,3,0,29.15,4.24,79.38,0.13,-0.1625,94.4095,2024-09-03 00:00:00
1594800,2024,9,3,0,25.54,1.78,92.25,0.56,0.3375,120.9095,2024-09-03 00:00:00
596160,2024,9,3,0,26.55,2.24,92.00,0.62,-2.6625,132.4095,2024-09-03 00:00:00
2355120,2024,9,3,0,27.99,1.90,80.06,0.12,5.8375,131.9095,2024-09-03 00:00:00
1594080,2024,9,3,0,25.54,1.78,92.25,0.56,0.3375,120.4095,2024-09-03 00:00:00
...,...,...,...,...,...,...,...,...,...,...,...
1574639,2024,10,2,23,28.62,2.95,79.81,0.06,0.3375,106.4095,2024-10-02 23:00:00
1573919,2024,10,2,23,28.55,3.01,81.69,0.09,0.3375,105.9095,2024-10-02 23:00:00
1573199,2024,10,2,23,28.55,3.01,81.69,0.09,0.3375,105.4095,2024-10-02 23:00:00
1580399,2024,10,2,23,23.99,0.06,97.50,0.01,0.3375,110.4095,2024-10-02 23:00:00


## Split Data

In [37]:
pivot = round(0.9*len(one_month_data))

In [38]:
train_data = one_month_data.iloc[:pivot]
test_data = one_month_data.iloc[pivot:]

## Modeling

In [334]:
from xgboost import XGBRegressor
import xgboost as xgb
import pandas as pd
# from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error

In [335]:
X_test = test_data.drop(columns=["DATETIME", "T2M"])
y_test = test_data["T2M"]

In [336]:
X_train = train_data.drop(columns=["DATETIME", "T2M"])
y_train = train_data["T2M"]

In [355]:
model = XGBRegressor(n_estimators=100)

In [356]:
model.fit(X_train, y_train)

In [357]:
y_hat = model.predict(X_test)

In [358]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 0.97


In [359]:
data_sample =  np.array([[2024, 10, 7, 13, 3.61, 49.0, 0, -6.914744, 107.609810]]) #bandung

In [360]:
# Make predictions using the trained model
predictions = model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[30.935558]


In [361]:
model.save_model('../model/temperature_model.json')

### Windspeed model

In [302]:
windspeed_model = XGBRegressor(n_estimators=100)

In [303]:
X_train = train_data.drop(columns=["DATETIME", "WS2M"])
y_train = train_data["WS2M"]

In [304]:
X_test = test_data.drop(columns=["DATETIME", "WS2M"])
y_test = test_data["WS2M"]   

In [305]:
windspeed_model.fit(X_train, y_train)

In [306]:
y_hat = windspeed_model.predict(X_test)

In [307]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 0.88


In [308]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 49.0, 0, -6.914744, 107.609810]]) # bandung

In [309]:
# Make predictions using the trained model
predictions = windspeed_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[3.176307]


In [310]:
windspeed_model.save_model('../model/windspeed_model.json')

### Humidity Model

In [386]:
humidity_model = XGBRegressor(n_estimators=100)

In [387]:
X_train = train_data.drop(columns=["DATETIME", "RH2M"])
y_train = train_data["RH2M"]

In [388]:
X_test = test_data.drop(columns=["DATETIME", "RH2M"])
y_test = test_data["RH2M"]   

In [389]:
humidity_model.fit(X_train, y_train)

In [390]:
y_hat = humidity_model.predict(X_test)

In [391]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 1.62


In [392]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 3.61, 0, -6.914744, 107.609810]]) # bandung

In [393]:
# Make predictions using the trained model
predictions = humidity_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[56.728752]


In [397]:
humidity_model.save_model('../model/humidity_model.json')

### Precipitation Model

In [95]:
precipitation_model = XGBRegressor()

In [96]:
X_train = train_data.drop(columns=["DATETIME", "PRECTOTCORR"])
y_train = train_data["PRECTOTCORR"]

In [97]:
X_test = test_data.drop(columns=["DATETIME", "PRECTOTCORR"])
y_test = test_data["PRECTOTCORR"]   

In [98]:
precipitation_model.fit(X_train, y_train)

In [99]:
y_hat = precipitation_model.predict(X_test)

In [100]:
mae = mean_absolute_error(y_test, y_hat)
print(f'Mean Absolute Error: {mae:.2f}')

Mean Absolute Error: 0.29


In [394]:
data_sample =  np.array([[2024, 10, 7, 13, 31.0 , 3.61, 49.0, -6.914744, 107.609810]]) # bandung

In [395]:
# Make predictions using the trained model
predictions = precipitation_model.predict(data_sample)

# Print the predictions
print("Predictions for the new data:")
print(predictions)


Predictions for the new data:
[0.07967559]


In [396]:
precipitation_model.save_model('../model/precipitation_model.json')