percobaan ke 3. prediksi gas 

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import RobustScaler, MinMaxScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
import seaborn as sns
from sklearn.metrics import mean_squared_error, mean_absolute_error
import os

class PowerPredictor:
    def __init__(self, data_paths):
        self.data_paths = data_paths
        self.time_scaler = MinMaxScaler()
        self.voltage_scaler = RobustScaler()
        self.power_scaler = RobustScaler()
        self.seq_length = 20
        self.model = None
        
    def load_and_preprocess_data(self):
        print("Loading and preprocessing data...")
        datasets = []
        
        for path in self.data_paths:
            df = pd.read_csv(path)
            # Basic preprocessing
            df['datetime'] = pd.to_datetime(df['time'], unit='s')
            df['hour'] = df['datetime'].dt.hour
            df['day'] = df['datetime'].dt.day
            
            # Periodic time features
            df['time_sin'] = np.sin(2 * np.pi * df['hour']/24)
            df['time_cos'] = np.cos(2 * np.pi * df['hour']/24)
            
            # Scale features
            df['voltage_scaled'] = self.voltage_scaler.fit_transform(df[['input_voltage']])
            df['power_scaled'] = self.power_scaler.fit_transform(df[['el_power']])
            
            # Rolling statistics
            window = 24
            df['voltage_rolling_mean'] = df['voltage_scaled'].rolling(window=window, min_periods=1).mean()
            df['power_rolling_mean'] = df['power_scaled'].rolling(window=window, min_periods=1).mean()
            df['voltage_rolling_std'] = df['voltage_scaled'].rolling(window=window, min_periods=1).std()
            df['power_rolling_std'] = df['power_scaled'].rolling(window=window, min_periods=1).std()
            
            datasets.append(df)
        
        self.data = pd.concat(datasets, ignore_index=True)
        
        # Feature selection
        self.features = [
            'voltage_scaled', 'power_scaled',
            'time_sin', 'time_cos',
            'voltage_rolling_mean', 'power_rolling_mean',
            'voltage_rolling_std', 'power_rolling_std'
        ]
        
        return self.data[self.features].values
    
    def create_sequences(self, data):
        print("Creating sequences...")
        sequences = []
        targets = []
        
        for i in range(len(data) - self.seq_length):
            seq = data[i:(i + self.seq_length)]
            target = data[i + self.seq_length, :2]  # Only voltage and power as targets
            sequences.append(seq)
            targets.append(target)
        
        return np.array(sequences), np.array(targets)
    
    def create_model(self, n_features):
        print("Creating model...")
        model = Sequential([
            LSTM(64, activation='relu', return_sequences=True, 
                 input_shape=(self.seq_length, n_features)),
            BatchNormalization(),
            Dropout(0.3),
            
            LSTM(32, activation='relu'),
            BatchNormalization(),
            Dropout(0.2),
            
            Dense(32, activation='relu'),
            BatchNormalization(),
            Dropout(0.2),
            
            Dense(16, activation='relu'),
            Dense(2)  # Predicting voltage and power
        ])
        
        optimizer = Adam(learning_rate=0.0005)
        model.compile(
            optimizer=optimizer,
            loss='mse',
            metrics=['mae']
        )
        
        return model
    
    def train_model(self, X_train, y_train, epochs=50):
        print("Training model...")
        callbacks = [
            EarlyStopping(
                monitor='val_loss',
                patience=10,
                restore_best_weights=True,
                verbose=1
            ),
            ReduceLROnPlateau(
                monitor='val_loss',
                factor=0.5,
                patience=5,
                min_lr=0.00001,
                verbose=1
            ),
            ModelCheckpoint(
                'best_power_model.keras',
                monitor='val_loss',
                save_best_only=True
            )
        ]
        
        history = self.model.fit(
            X_train, y_train,
            epochs=epochs,
            batch_size=32,
            validation_split=0.2,
            callbacks=callbacks,
            verbose=1,
            shuffle=True
        )
        
        return history
    
    def evaluate_model(self, X_test, y_test):
        print("\nEvaluating model performance...")
        predictions = self.model.predict(X_test)
        
        # Inverse transform predictions
        pred_voltage = self.voltage_scaler.inverse_transform(predictions[:, 0].reshape(-1, 1))
        pred_power = self.power_scaler.inverse_transform(predictions[:, 1].reshape(-1, 1))
        
        actual_voltage = self.voltage_scaler.inverse_transform(y_test[:, 0].reshape(-1, 1))
        actual_power = self.power_scaler.inverse_transform(y_test[:, 1].reshape(-1, 1))
        
        # Calculate metrics
        voltage_mse = mean_squared_error(actual_voltage, pred_voltage)
        voltage_mae = mean_absolute_error(actual_voltage, pred_voltage)
        power_mse = mean_squared_error(actual_power, pred_power)
        power_mae = mean_absolute_error(actual_power, pred_power)
        
        print("\nVoltage Metrics:")
        print(f"MSE: {voltage_mse:.4f}")
        print(f"MAE: {voltage_mae:.4f}")
        print("\nPower Metrics:")
        print(f"MSE: {power_mse:.4f}")
        print(f"MAE: {power_mae:.4f}")
        
        return pred_voltage, pred_power, actual_voltage, actual_power
    
    def plot_results(self, history, pred_voltage, pred_power, actual_voltage, actual_power):
        print("Plotting results...")
        # Training history
        plt.figure(figsize=(15, 5))
        
        plt.subplot(131)
        plt.plot(history.history['loss'], label='Training Loss')
        plt.plot(history.history['val_loss'], label='Validation Loss')
        plt.title('Model Loss')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        
        # Voltage predictions
        plt.subplot(132)
        plt.scatter(actual_voltage, pred_voltage, alpha=0.5)
        plt.plot([actual_voltage.min(), actual_voltage.max()], 
                [actual_voltage.min(), actual_voltage.max()], 'r--', lw=2)
        plt.title('Voltage Predictions vs Actual')
        plt.xlabel('Actual Voltage')
        plt.ylabel('Predicted Voltage')
        
        # Power predictions
        plt.subplot(133)
        plt.scatter(actual_power, pred_power, alpha=0.5)
        plt.plot([actual_power.min(), actual_power.max()], 
                [actual_power.min(), actual_power.max()], 'r--', lw=2)
        plt.title('Power Predictions vs Actual')
        plt.xlabel('Actual Power')
        plt.ylabel('Predicted Power')
        
        plt.tight_layout()
        plt.show()
    
    def train_and_evaluate(self):
        # Load and preprocess data
        data = self.load_and_preprocess_data()
        
        # Create sequences
        X, y = self.create_sequences(data)
        
        # Split data
        train_size = int(0.8 * len(X))
        X_train, X_test = X[:train_size], X[train_size:]
        y_train, y_test = y[:train_size], y[train_size:]
        
        # Create and train model
        self.model = self.create_model(X.shape[2])
        history = self.train_model(X_train, y_train)
        
        # Evaluate
        pred_voltage, pred_power, actual_voltage, actual_power = self.evaluate_model(X_test, y_test)
        
        # Plot results
        self.plot_results(history, pred_voltage, pred_power, actual_voltage, actual_power)
        
        return self.model

# Usage example
if __name__ == "__main__":
    # Define paths to your data
    data_paths = [
        r"C:\Users\HP\Documents\data mining\uts-uas\train\ex_1.csv",
        r"C:\Users\HP\Documents\data mining\uts-uas\train\ex_9.csv",
        r"C:\Users\HP\Documents\data mining\uts-uas\train\ex_20.csv",
        r"C:\Users\HP\Documents\data mining\uts-uas\train\ex_21.csv",
        r"C:\Users\HP\Documents\data mining\uts-uas\train\ex_23.csv",
        r"C:\Users\HP\Documents\data mining\uts-uas\train\ex_24.csv"
    ]
    
    # Create predictor instance
    predictor = PowerPredictor(data_paths)
    
    ModelCheckpoint(
    'best_power_model.keras',
    monitor='val_loss',
    save_best_only=True
)