In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer

class EnhancedInflationPredictor:
    def __init__(self, lookback=4, lstm_units=32, epochs=300, n_models=5):
        self.lookback = lookback
        self.lstm_units = lstm_units
        self.epochs = epochs
        self.n_models = n_models
        self.scalers = {}
        self.imputer = KNNImputer(n_neighbors=3)
        self.models = {}
        
    def prepare_data(self, data, target_col='FP_CPI_TOTL_ZG'):
        numeric_cols = data.select_dtypes(include=[np.number]).columns.tolist()
        
        if target_col not in numeric_cols:
            raise ValueError(f"Target column {target_col} not found")
        
        if numeric_cols[-1] != target_col:
            numeric_cols.remove(target_col)
            numeric_cols.append(target_col)
        
        data_clean = data[numeric_cols].copy()
        
        data_imputed = self.imputer.fit_transform(data_clean)
        data_clean = pd.DataFrame(data_imputed, 
                                 index=data.index, 
                                 columns=data_clean.columns)
        
        return data_clean
    
    def create_dataset(self, data, target_col='FP_CPI_TOTL_ZG'):
        X, y = [], []
        data_values = data.values
        
        target_idx = data.columns.get_loc(target_col)
        
        for i in range(self.lookback, len(data_values)):
            X.append(data_values[i-self.lookback:i, :])
            y.append(data_values[i, target_idx])
            
        return np.array(X), np.array(y)
    
    def build_model(self, input_shape):
        if hasattr(tf.keras.optimizers, 'legacy'):
            optimizer = tf.keras.optimizers.legacy.Adam(learning_rate=0.001)
        else:
            optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        
        model = Sequential([
            LSTM(self.lstm_units, 
                 activation='tanh',
                 input_shape=input_shape,
                 return_sequences=True,
                 kernel_regularizer=l2(0.01)),
            Dropout(0.3),
            BatchNormalization(),
            
            LSTM(self.lstm_units//2,
                 activation='tanh',
                 kernel_regularizer=l2(0.01)),
            Dropout(0.3),
            BatchNormalization(),
            
            Dense(64, activation='relu', kernel_regularizer=l2(0.01)),
            Dropout(0.2),
            Dense(32, activation='relu'),
            Dense(1)
        ])
        
        model.compile(
            optimizer=optimizer,
            loss='mse',
            metrics=['mae']
        )
        
        return model
    
    def train_region_model(self, region_data, region_name):
        prepared_data = self.prepare_data(region_data)
        
        scaler = StandardScaler()
        scaled_data = scaler.fit_transform(prepared_data)
        self.scalers[region_name] = scaler
        
        X, y = self.create_dataset(
            pd.DataFrame(scaled_data, columns=prepared_data.columns)
        )
        
        if len(X) == 0:
            raise ValueError(f"Insufficient data for training")
        
        models = []
        
        for i in range(self.n_models):
            model = self.build_model((self.lookback, X.shape[2]))
            
            early_stopping = EarlyStopping(
                monitor='val_loss', patience=50, restore_best_weights=True
            )
            
            reduce_lr = ReduceLROnPlateau(
                monitor='val_loss', factor=0.5, patience=20
            )
            
            model.fit(
                X, y,
                epochs=self.epochs,
                batch_size=8,
                validation_split=0.2,
                callbacks=[early_stopping, reduce_lr],
                verbose=0
            )
            
            models.append(model)
        
        self.models[region_name] = {
            'models': models,
            'feature_names': prepared_data.columns.tolist(),
            'last_sequence': scaled_data[-self.lookback:]
        }
        
        return models
    
    def predict_future(self, region_name, years=10):
        if region_name not in self.models:
            raise ValueError(f"No trained model found for {region_name}")
        
        model_info = self.models[region_name]
        scaler = self.scalers[region_name]
        models = model_info['models']
        
        predictions = []
        current_sequence = model_info['last_sequence'].copy()
        
        for year in range(years):
            year_predictions = []
            
            for model in models:
                input_seq = current_sequence.reshape(1, self.lookback, -1)
                pred_scaled = model.predict(input_seq, verbose=0)[0, 0]
                year_predictions.append(pred_scaled)
            
            ensemble_pred = np.mean(year_predictions)
            
            new_point = current_sequence[-1].copy()
            new_point[-1] = ensemble_pred
            
            current_sequence = np.roll(current_sequence, -1, axis=0)
            current_sequence[-1] = new_point
            
            dummy_array = np.zeros((1, len(model_info['feature_names'])))
            dummy_array[0, -1] = ensemble_pred
            pred_original = scaler.inverse_transform(dummy_array)[0, -1]
            
            predictions.append(pred_original)
        
        return predictions

def main():
    input_file = "/Users/oushilin/Desktop/AAAI/Data/comprehensive_economic_data.xlsx"
    output_file = "/Users/oushilin/Desktop/AAAI/Data/inflation_forecasts_2026_2035.xlsx"
    
    china_data = pd.read_excel(input_file, sheet_name='China', index_col='year')
    eu_data = pd.read_excel(input_file, sheet_name='European Union', index_col='year')
    mena_data = pd.read_excel(input_file, sheet_name='Middle East & North Africa', index_col='year')
    
    regions_data = {
        'China': china_data,
        'European Union': eu_data,
        'Middle East & North Africa': mena_data
    }
    
    predictor = EnhancedInflationPredictor(
        lookback=4,
        lstm_units=32,
        epochs=300,
        n_models=5
    )
    
    forecasts = {}
    regions = ['China', 'European Union', 'Middle East & North Africa']
    
    for region in regions:
        predictor.train_region_model(regions_data[region], region)
        future_predictions = predictor.predict_future(region, years=10)
        forecasts[region] = future_predictions
    
    years = list(range(2026, 2036))
    results_data = []
    
    for i, year in enumerate(years):
        row = {'Year': year}
        for region in regions:
            row[region] = forecasts[region][i]
        results_data.append(row)
    
    results_df = pd.DataFrame(results_data)
    results_df.to_excel(output_file, index=False)
    
    print("Inflation Forecasts 2026-2035:")
    print("=" * 50)
    for region in regions:
        print(f"\n{region}:")
        for i, year in enumerate(years):
            print(f"  {year}: {forecasts[region][i]:.2f}%")
    
    return results_df

if __name__ == "__main__":
    np.random.seed(42)
    tf.random.set_seed(42)
    
    results = main()