# **Extreme Climate Change Prediction (CLEEMATE)**

# Resource

In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_score, recall_score, f1_score
from tensorflow.keras.callbacks import EarlyStopping
from imblearn.over_sampling import SMOTE
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder

# Gathering Dataset

In [2]:
# Define column names to assign
column_names = ['location', 'time', 'deg_min', 'deg_max', 'hum_min', 'hum_max', 'humidity', 'temperature', 'weather_code', 'wind_direction', 'wind_speed']

# List of the dataset filenames
dataset_files = [
    'kecamatanforecast-jakarta.csv',
    'kecamatanforecast-jambi.csv',
    'kecamatanforecast-jawatengah.csv',
    'kecamatanforecast-jawatimur.csv',
    'kecamatanforecast-sumut.csv'
]

# Read each dataset, assign column names, and drop unwanted columns
datasets = []
for file in dataset_files:
    # Read the CSV file
    df = pd.read_csv(file, sep=';', header=None)

    # Assign column names
    df.columns = column_names

    # Split the 'time' column into 'date' and 'time'
    df[['date', 'time']] = df['time'].str.split(' ', expand=True)

    # Reorder the columns to have 'date' first, followed by 'time'
    df = df[['date', 'time'] + [col for col in df.columns if col not in ['date', 'time']]]

    # Drop unnecessary columns
    df = df.drop(columns=['deg_min', 'deg_max', 'hum_min', 'hum_max', 'wind_direction'])

    # Append the cleaned dataset to the list
    datasets.append(df)

# Combine all datasets into one
combined_df = pd.concat(datasets, ignore_index=True)

# Show the updated dataset (optional)
print(combined_df.head())

# Save the combined dataframe to a new file
combined_df.to_csv('cleemate-dataset.csv', sep=';', index=False)


         date      time  location  humidity  temperature  weather_code  \
0  2024-11-25  00:00:00    501191        88           26             4   
1  2024-11-25  01:00:00    501191        81           27             4   
2  2024-11-25  02:00:00    501191        73           29             4   
3  2024-11-25  03:00:00    501191        66           31             4   
4  2024-11-25  04:00:00    501191        63           31             4   

   wind_speed  
0           1  
1           1  
2           1  
3           1  
4           1  


# Merge Dataset

In [3]:
# Load the kecamatanforecast-jawatimur-fix dataset
df_forecast = pd.read_csv('cleemate-dataset.csv', sep=';')

# Load the kecamatan_geofeatures dataset
df_geofeatures = pd.read_csv('kecamatan_geofeatures.csv', sep=';')

# Merge the datasets on the 'location' column to get the corresponding 'kota' and 'provinsi' values
merged_df = pd.merge(df_forecast, df_geofeatures[['location', 'kota', 'provinsi']], on='location', how='left')

# Replace 'location' column with 'kota' column
merged_df['location'] = merged_df['kota']

# Drop the 'kota' column, as we no longer need it
merged_df = merged_df.drop(columns=['kota'])

# Rearrange columns so that 'provinsi' comes after 'location'
columns = ['location', 'provinsi'] + [col for col in merged_df.columns if col not in ['location', 'provinsi']]
merged_df = merged_df[columns]

# Show the updated dataframe (first few rows)
print(merged_df.head())

# Optionally, save the updated dataframe to a new CSV file
merged_df.to_csv('cleemate-dataset-fix.csv', sep=';', index=False)


             location     provinsi        date      time  humidity  \
0  Kota Jakarta Timur  DKI Jakarta  2024-11-25  00:00:00        88   
1  Kota Jakarta Timur  DKI Jakarta  2024-11-25  01:00:00        81   
2  Kota Jakarta Timur  DKI Jakarta  2024-11-25  02:00:00        73   
3  Kota Jakarta Timur  DKI Jakarta  2024-11-25  03:00:00        66   
4  Kota Jakarta Timur  DKI Jakarta  2024-11-25  04:00:00        63   

   temperature  weather_code  wind_speed  
0           26             4           1  
1           27             4           1  
2           29             4           1  
3           31             4           1  
4           31             4           1  


In [4]:
# Load the kecamatanforecast-jawatimur-fix dataset
df_forecast = pd.read_csv('cleemate-dataset-fix.csv', sep=';')

# Load the weather dataset
df_weather = pd.read_csv('weather.csv', sep=';')

# Merge the datasets on the 'weather_code' column to get the corresponding 'weather' values
merged_df = pd.merge(df_forecast, df_weather[['weather_code', 'weather']], on='weather_code', how='left')

# Show the updated dataframe (first few rows)
print(merged_df.head())

# Optionally, save the updated dataframe to a new CSV file
merged_df.to_csv('cleemate-dataset-fix.csv', sep=';', index=False)


             location     provinsi        date      time  humidity  \
0  Kota Jakarta Timur  DKI Jakarta  2024-11-25  00:00:00        88   
1  Kota Jakarta Timur  DKI Jakarta  2024-11-25  01:00:00        81   
2  Kota Jakarta Timur  DKI Jakarta  2024-11-25  02:00:00        73   
3  Kota Jakarta Timur  DKI Jakarta  2024-11-25  03:00:00        66   
4  Kota Jakarta Timur  DKI Jakarta  2024-11-25  04:00:00        63   

   temperature  weather_code  wind_speed        weather  
0           26             4           1  Berawan Tebal  
1           27             4           1  Berawan Tebal  
2           29             4           1  Berawan Tebal  
3           31             4           1  Berawan Tebal  
4           31             4           1  Berawan Tebal  


# Preprocessing Dataset

In [5]:
# Load the dataset
df_forecast = pd.read_csv('cleemate-dataset-fix.csv', sep=';')

# Create necessary columns first (before imputation)
df_forecast['temp_change'] = df_forecast['temperature'].diff().fillna(0)
df_forecast['wind_speed_change'] = df_forecast['wind_speed'].diff().fillna(0)
df_forecast['humidity_change'] = df_forecast['humidity'].diff().fillna(0)

# Create lag features for 1-day lookback (shift the values by 1)
df_forecast['temp_previous_day'] = df_forecast['temperature'].shift(1)
df_forecast['wind_speed_previous_day'] = df_forecast['wind_speed'].shift(1)
df_forecast['humidity_previous_day'] = df_forecast['humidity'].shift(1)

# Rolling averages over the past 5 columns for temperature, wind speed, and humidity
df_forecast['rolling_temp'] = df_forecast['temperature'].rolling(window=5).mean()
df_forecast['rolling_wind'] = df_forecast['wind_speed'].rolling(window=5).mean()
df_forecast['rolling_humidity'] = df_forecast['humidity'].rolling(window=5).mean()

# Handle missing values by imputing (using median or mean)
imputer = SimpleImputer(strategy='median')
df_forecast[['humidity', 'temperature', 'wind_speed', 'temp_change', 'wind_speed_change',
             'humidity_change', 'temp_previous_day', 'wind_speed_previous_day',
             'humidity_previous_day', 'rolling_temp', 'rolling_wind', 'rolling_humidity']] = imputer.fit_transform(
    df_forecast[['humidity', 'temperature', 'wind_speed', 'temp_change', 'wind_speed_change',
                 'humidity_change', 'temp_previous_day', 'wind_speed_previous_day',
                 'humidity_previous_day', 'rolling_temp', 'rolling_wind', 'rolling_humidity']])

# Convert 'weather_code' into one-hot encoded features
weather_encoder = OneHotEncoder(sparse_output=False)
weather_code_encoded = weather_encoder.fit_transform(df_forecast[['weather_code']])

# Create a DataFrame from the one-hot encoded features and join with the original dataframe
weather_code_df = pd.DataFrame(weather_code_encoded, columns=weather_encoder.get_feature_names_out(['weather_code']))
df_forecast = pd.concat([df_forecast, weather_code_df], axis=1)

# Define extreme fluctuation (label = 1 for extreme change, 0 otherwise)
df_forecast['extreme_fluctuation'] = ((df_forecast['temp_change'].abs() > 5) |
                                      (df_forecast['wind_speed_change'].abs() > 10) |
                                      (df_forecast['humidity_change'].abs() > 10)).astype(int)

# Drop rows with missing data due to lag or rolling operations
df_forecast = df_forecast.dropna(subset=['extreme_fluctuation'])

# Features (X) and Target (y)
X = df_forecast[['humidity', 'temperature', 'wind_speed', 'temp_change', 'wind_speed_change',
                 'humidity_change', 'temp_previous_day', 'wind_speed_previous_day',
                 'humidity_previous_day', 'rolling_temp', 'rolling_wind', 'rolling_humidity'] + list(weather_code_df.columns)]
y = df_forecast['extreme_fluctuation']

# Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.3, random_state=42)

# Handle class imbalance using SMOTE (oversampling the minority class)
smote = SMOTE(sampling_strategy='auto', random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

# Build and Train the model

In [6]:
# Build the model
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(X_train_resampled.shape[1],)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')  # Binary classification (0 or 1)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train_resampled, y_train_resampled, epochs=50, batch_size=64, validation_data=(X_test, y_test))



Epoch 1/50
[1m4089/4089[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 3ms/step - accuracy: 0.9485 - loss: 0.1210 - val_accuracy: 0.9862 - val_loss: 0.0279
Epoch 2/50
[1m4089/4089[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 6ms/step - accuracy: 0.9885 - loss: 0.0300 - val_accuracy: 0.9937 - val_loss: 0.0146
Epoch 3/50
[1m4089/4089[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 3ms/step - accuracy: 0.9924 - loss: 0.0205 - val_accuracy: 0.9963 - val_loss: 0.0101
Epoch 4/50
[1m4089/4089[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 4ms/step - accuracy: 0.9942 - loss: 0.0153 - val_accuracy: 0.9926 - val_loss: 0.0145
Epoch 5/50
[1m4089/4089[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 3ms/step - accuracy: 0.9953 - loss: 0.0126 - val_accuracy: 0.9981 - val_loss: 0.0055
Epoch 6/50
[1m4089/4089[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 3ms/step - accuracy: 0.9961 - loss: 0.0107 - val_accuracy: 0.9975 - val_loss: 0.0057
Epoch 7/50

# Testing and Evaluate the model

In [7]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

y_pred = (model.predict(X_test) > 0.5).astype(int)

print(f"Precision: {precision_score(y_test, y_pred)}")
print(f"Recall: {recall_score(y_test, y_pred)}")
print(f"F1 Score: {f1_score(y_test, y_pred)}")

from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_test, y_pred))


[1m2038/2038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9993 - loss: 0.0025
Test Accuracy: 99.92%
[1m2038/2038[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step
Precision: 0.9948946339343906
Recall: 0.9996725605762934
F1 Score: 0.9972778745644599
[[55996    47]
 [    3  9159]]


In [8]:
# Save the model to the runtime (local file system)
model.save('/content/cleemate-model.h5')



In [6]:
from flask import Flask, request, jsonify
import requests
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from datetime import datetime

# Load your trained model
model = tf.keras.models.load_model('cleemate-model.h5')

# Initialize preprocessing tools (scaler, imputer, etc.)
scaler = StandardScaler()
imputer = SimpleImputer(strategy='median')

# Create Flask app
app = Flask(__name__)

def get_bmkg_weather_data(kode_wilayah):
    # Define BMKG API URL with the kode_wilayah_tingkat_iv parameter
    api_url = f'https://api.bmkg.go.id/publik/prakiraan-cuaca?adm4={kode_wilayah}'

    # Make the GET request
    response = requests.get(api_url)

    if response.status_code == 200:
        return response.json()
    else:
        return None

def preprocess_weather_data(weather_data):
    # Extract relevant data from BMKG API response
    # The API returns a nested structure, so you may need to adjust according to the actual data format

    try:
        # Assuming weather data contains these fields (adjust based on API structure)
        temperature = weather_data['temperature']  # Adjust the key names as necessary
        humidity = weather_data['humidity']
        wind_speed = weather_data['wind_speed']

        # Calculate other necessary features
        temp_change = 0  # Calculate temperature change if needed
        wind_speed_change = 0  # Calculate wind speed change
        humidity_change = 0  # Calculate humidity change

        # Example: create a DataFrame for the input features
        features = pd.DataFrame({
            'temperature': [temperature],
            'humidity': [humidity],
            'wind_speed': [wind_speed],
            'temp_change': [temp_change],
            'wind_speed_change': [wind_speed_change],
            'humidity_change': [humidity_change]
        })

        return features
    except KeyError:
        return None  # In case the structure doesn't match

@app.route('/predict', methods=['POST'])
def predict():
    # Get user input from the request (JSON)
    data = request.get_json()
    location = data['location']
    date_input = data['date']
    kode_wilayah = data['kode_wilayah']  # Adding kode_wilayah for BMKG API

    # Step 1: Preprocess the input data (date, location)
    date = datetime.strptime(date_input, "%Y-%m-%d")
    day_of_week = date.weekday()
    month = date.month
    year = date.year

    # Step 2: Fetch weather data from BMKG API
    weather_data = get_bmkg_weather_data(kode_wilayah)

    if not weather_data:
        return jsonify({'error': 'Failed to retrieve weather data'}), 400

    # Step 3: Process the retrieved weather data
    features = preprocess_weather_data(weather_data)

    if features is None:
        return jsonify({'error': 'Failed to process weather data'}), 400

    # Add date-based features
    features['day_of_week'] = day_of_week
    features['month'] = month
    features['year'] = year

    # Step 4: Preprocess the features (scaling, imputing missing values)
    features = imputer.transform(features)  # Impute missing values
    features_scaled = scaler.transform(features)  # Scale the features

    # Step 5: Make the prediction
    prediction = model.predict(features_scaled)
    predicted_class = (prediction > 0.5).astype(int)  # Convert to binary output (0 or 1)

    # Step 6: Return the prediction as a JSON response
    return jsonify({'prediction': int(predicted_class[0])})

if __name__ == '__main__':
    app.run(debug=True)


ModuleNotFoundError: No module named 'flask'