In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/indonesia-climate/climate_data.csv
/kaggle/input/indonesia-climate/station_detail.csv
/kaggle/input/indonesia-climate/province_detail.csv


In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.impute import SimpleImputer

climate = pd.read_csv("/kaggle/input/indonesia-climate/climate_data.csv")
station = pd.read_csv("/kaggle/input/indonesia-climate/station_detail.csv")
province = pd.read_csv("/kaggle/input/indonesia-climate/province_detail.csv")

climate_station = pd.merge(climate, station, on="station_id", how="left")
climate_station_province = pd.merge(climate_station, province, on="province_id", how="left")

climate_station_province['date'] = pd.to_datetime(climate_station_province['date'], format='%d-%m-%Y')
climate_station_province['day_of_week'] = climate_station_province['date'].dt.dayofweek
climate_station_province['month'] = climate_station_province['date'].dt.month
climate_station_province = climate_station_province.dropna(subset=['Tavg'])

features = ['Tn', 'Tx', 'RH_avg', 'RR', 'ff_x', 'ff_avg', 'day_of_week', 'month', 'province_id']
target = 'Tavg'

X = climate_station_province[features]
y = climate_station_province[target]

imputer = SimpleImputer(strategy='median')
X_imputed = imputer.fit_transform(X)
scaler = MinMaxScaler(feature_range=(0, 1))
X_scaled = scaler.fit_transform(X_imputed)

def create_sequences(data, target, seq_length):
    X_seq, y_seq = [], []
    for i in range(len(data) - seq_length):
        X_seq.append(data[i:i + seq_length])
        y_seq.append(target[i + seq_length])
    return np.array(X_seq), np.array(y_seq)

seq_length = 10  
X_seq, y_seq = create_sequences(X_scaled, y.values, seq_length)

X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.2, random_state=42)

model = Sequential()
model.add(LSTM(units=64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=64, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(units=1))  # Single output for temperature prediction
model.compile(optimizer='adam', loss='mean_squared_error')

model.fit(X_train, y_train, epochs=20, batch_size=32)



  super().__init__(**kwargs)


Epoch 1/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m157s[0m 11ms/step - loss: 22.8262
Epoch 2/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m155s[0m 11ms/step - loss: 3.6911
Epoch 3/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m154s[0m 11ms/step - loss: 3.0514
Epoch 4/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m158s[0m 12ms/step - loss: 2.4796
Epoch 5/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 12ms/step - loss: 2.1627
Epoch 6/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 12ms/step - loss: 1.8631
Epoch 7/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 12ms/step - loss: 1.5858
Epoch 8/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m163s[0m 12ms/step - loss: 1.4079
Epoch 9/20
[1m13604/13604[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 12ms/step - loss: 1.2700
Epoch 10/20
[1m13604/13604

<keras.src.callbacks.history.History at 0x7824cf46e140>

In [3]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

y_pred = model.predict(X_test)  
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("MAE:", mae)
print("RMSE:", rmse)
print("R²:", r2)

[1m3401/3401[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 5ms/step
MAE: 0.6995312522398159
RMSE: 0.9091154205024975
R²: 0.7783065324508531


In [4]:
82
3
8
6

6

In [5]:
model.save('v1.0.h5') 
model.summary()


In [6]:
from tensorflow.keras.models import load_model

model = load_model('v1.0.h5')
model.summary()

In [7]:
import shutil
shutil.move('v1.0.h5', '/kaggle/working/v1.0.h5')

'/kaggle/working/v1.0.h5'