In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import tensorflow as tf
import os

# Suppress TensorFlow debug logs
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # 0 = all logs, 1 = info, 2 = warnings, 3 = errors only
tf.get_logger().setLevel('ERROR')

# --- Load and Preprocess Data ---
train_df = pd.read_csv('train.csv')  # Replace with your file path
test_df = pd.read_csv('test.csv')    # Replace with your file path

# Date preprocessing
base_year = 2015
for df in [train_df, test_df]:
    if df['Year'].max() < 100:
        df['Year'] = df['Year'] + base_year
    df['date'] = pd.to_datetime(df[['Year', 'Month', 'Day']], errors='coerce')
    df.drop(['Year', 'Month', 'Day'], axis=1, inplace=True)

# Temperature conversion (only for train_df)
def convert_temp(df, col):
    if col in df.columns:
        df[col + '_C'] = df[col].apply(lambda x: x - 273.15 if x > 100 else x)
    return df

train_df = convert_temp(train_df, 'Avg_Temperature')
train_df = convert_temp(train_df, 'Avg_Feels_Like_Temperature')
train_df.drop(['Avg_Temperature', 'Avg_Feels_Like_Temperature'], axis=1, inplace=True, errors='ignore')

# Fill missing values
train_df = train_df.groupby('kingdom').apply(lambda x: x.ffill().bfill()).reset_index(drop=True)
test_df = test_df.groupby('kingdom').apply(lambda x: x.ffill().bfill()).reset_index(drop=True)

# Sort by date and kingdom
train_df = train_df.sort_values(['kingdom', 'date'])
test_df = test_df.sort_values(['kingdom', 'date'])

# --- Define Targets ---
targets = ['Avg_Temperature_C', 'Radiation', 'Rain_Amount', 'Wind_Speed', 'Wind_Direction']

# --- Scale Data ---
scaler = MinMaxScaler()
train_scaled = train_df.copy()
train_scaled[targets] = scaler.fit_transform(train_df[targets])

# --- Submission 5: LSTM ---
submission_lstm = test_df[['ID']].copy()
lookback = 10

for target in targets:
    preds = []
    print(f"Processing {target}...")
    for kingdom in train_df['kingdom'].unique():
        # Prepare sequence data
        series = train_scaled[train_df['kingdom'] == kingdom][target].values
        test_len = len(test_df[test_df['kingdom'] == kingdom])
        
        if len(series) > lookback:
            # Create sequences
            X = np.array([series[i:i+lookback] for i in range(len(series)-lookback)])
            y = series[lookback:]
            X = X.reshape((X.shape[0], X.shape[1], 1))  # [samples, timesteps, features]
            
            # Split for validation (last 20% as validation)
            split_idx = int(len(X) * 0.8)
            X_train, X_val = X[:split_idx], X[split_idx:]
            y_train, y_val = y[:split_idx], y[split_idx:]
            
            # Build and train LSTM model
            model = Sequential([
                LSTM(50, input_shape=(lookback, 1), return_sequences=False),
                Dense(1)
            ])
            model.compile(optimizer='adam', loss='mse')
            model.fit(X_train, y_train, epochs=30, batch_size=32, verbose=0)
            
            # Validate
            y_pred_val = model.predict(X_val, verbose=0)
            smape = 100 / len(y_val) * np.sum(np.abs(y_pred_val.flatten() - y_val) / ((np.abs(y_val) + np.abs(y_pred_val.flatten())) / 2))
            print(f"sMAPE for {target} in {kingdom}: {smape:.2f}%")
            
            # Predict iteratively for test data
            last_seq = series[-lookback:].reshape((1, lookback, 1))
            pred = []
            for _ in range(test_len):
                next_val = model.predict(last_seq, verbose=0)
                pred.append(next_val[0, 0])
                last_seq = np.roll(last_seq, -1)
                last_seq[0, -1, 0] = next_val[0, 0]
            
            # Inverse transform predictions
            pred_array = np.array(pred).reshape(-1, 1)
            pred_transformed = scaler.inverse_transform(np.hstack([pred_array] + [np.zeros((len(pred_array), len(targets)-1))]))[:, 0]
            preds.extend(pred_transformed)
        else:
            # Fallback to mean if not enough data
            print(f"Not enough data for LSTM in {kingdom} for {target}. Using mean.")
            mean_val = train_scaled[train_df['kingdom'] == kingdom][target].mean()
            mean_transformed = scaler.inverse_transform(np.hstack([[mean_val]] + [[0]*(len(targets)-1)]))[0]
            preds.extend([mean_transformed] * test_len)
    
    # Add predictions to submission
    submission_lstm[target.replace('_C', '')] = preds

# Save submission
submission_lstm.to_csv('submission_lstm.csv', index=False)
print("LSTM Submission Saved")
print("\nSubmission Preview:\n", submission_lstm.head())

Processing Avg_Temperature_C...
sMAPE for Avg_Temperature_C in Arcadia: 6.03%
sMAPE for Avg_Temperature_C in Atlantis: 4.63%
sMAPE for Avg_Temperature_C in Avalon: 4.17%
sMAPE for Avg_Temperature_C in Camelot: 7.81%
sMAPE for Avg_Temperature_C in Dorne: 4.72%
sMAPE for Avg_Temperature_C in Eden: 4.31%
sMAPE for Avg_Temperature_C in El Dorado: 4.93%
sMAPE for Avg_Temperature_C in Elysium: 4.57%
sMAPE for Avg_Temperature_C in Emerald City: 4.47%
sMAPE for Avg_Temperature_C in Helios: 5.90%
sMAPE for Avg_Temperature_C in Krypton: 4.43%
sMAPE for Avg_Temperature_C in Metropolis: 4.40%
sMAPE for Avg_Temperature_C in Midgar: 3.94%
sMAPE for Avg_Temperature_C in Midgard: 6.66%
sMAPE for Avg_Temperature_C in Mordor: 4.38%
sMAPE for Avg_Temperature_C in Neo-City: 4.53%
sMAPE for Avg_Temperature_C in Neo-Tokyo: 4.50%
sMAPE for Avg_Temperature_C in Nirvana: 4.05%
sMAPE for Avg_Temperature_C in Olympus: 4.59%
sMAPE for Avg_Temperature_C in Pandora: 4.47%
sMAPE for Avg_Temperature_C in Rapture: 4.6