<a href="https://colab.research.google.com/github/Saximn/skills-code-with-codespaces/blob/main/PSA_Codesprint_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

def build_model(input_shape):
    """Builds a simple neural network model for emissions prediction."""
    model = Sequential()
    model.add(Dense(64, activation='relu', input_shape=(input_shape,)))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, activation='linear'))  # For regression

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Load historical energy consumption data
def load_data(file_path):
    """
    Load dataset from a CSV file and return a DataFrame
    """
    try:
        data = pd.read_csv(file_path, parse_dates=['timestamp'])
        print(f"Data loaded successfully from {file_path}")
        return data
    except FileNotFoundError:
        print(f"File {file_path} not found.")
        return None


# Handle missing data
def handle_missing_data(data):
    """
    Handle missing data by filling with mean or removing rows with missing values
    """
    # Example: Fill missing values in 'energy_consumption' column with the mean
    data['energy_consumption'] = data['energy_consumption'].fillna(data['energy_consumption'].mean())
    data['carbon_emissions'] = data['carbon_emissions'].fillna(data['carbon_emissions'].mean())
    return data


# Feature scaling
def scale_data(data, columns_to_scale):
    """
    Normalize or scale specific columns using MinMaxScaler
    """
    scaler = MinMaxScaler()
    data[columns_to_scale] = scaler.fit_transform(data[columns_to_scale])
    return data


# Feature engineering
def add_features(data):
    """
    Add new features to the dataset, e.g., extracting hour or day from the timestamp
    """
    data['hour'] = data['timestamp'].dt.hour
    data['day_of_week'] = data['timestamp'].dt.dayofweek
    return data


if __name__ == "__main__":
    # Load the datasets
    energy_data = load_data('data/historical_energy_data.csv')
    emissions_data = load_data('data/emissions_data.csv')

    # Ensure the data is loaded before processing
    if energy_data is not None and emissions_data is not None:
        # Preprocess the energy and emissions data
        energy_data = handle_missing_data(energy_data)
        emissions_data = handle_missing_data(emissions_data)

        # Merge the datasets if needed
        merged_data = pd.merge(energy_data, emissions_data, on='timestamp', how='inner')

        # Add features
        merged_data = add_features(merged_data)

        # Scale selected columns
        scaled_data = scale_data(merged_data, columns_to_scale=['energy_consumption', 'carbon_emissions'])

        # Save the preprocessed data for use in the model
        scaled_data.to_csv('data/preprocessed_energy_emissions.csv', index=False)
        print("Preprocessing complete, data saved to 'data/preprocessed_energy_emissions.csv'")

File data/historical_energy_data.csv not found.
File data/emissions_data.csv not found.


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import ModelCheckpoint

def train_model(data_path):
    """Train the model using preprocessed data."""
    # Load preprocessed data
    data = pd.read_csv(data_path)

    # Split into features (X) and target (y)
    X = data[['NOx', 'PM', 'SOx', 'Energy']]  # Select features
    y = data['CO2']  # Target variable

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Build model
    model = build_model(input_shape=X_train.shape[1])

    # Set up checkpoint to save the best model
    checkpoint = ModelCheckpoint('saved_models/energy_model.h5', save_best_only=True, monitor='val_loss', mode='min')

    # Train the model
    history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=50, batch_size=32, callbacks=[checkpoint])

    print("Model training complete. Best model saved to 'saved_models/energy_model.h5'.")

if __name__ == "__main__":
    train_model('preprocessed_data.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'preprocessed_data.csv'

In [None]:
import pandas as pd
from tensorflow.keras.models import load_model

def load_data(file_path):
    """Load new data to make predictions."""
    data = pd.read_csv(file_path)
    return data

def predict_emissions(data_path):
    """Make predictions using the trained model."""
    # Load the preprocessed data
    data = load_data(data_path)

    # Load the saved model
    model = load_model('saved_models/energy_model.h5')

    # Select features
    X = data[['NOx', 'PM', 'SOx', 'Energy']]

    # Make predictions
    predictions = model.predict(X)

    # Add predictions to the original data
    data['Predicted CO2'] = predictions
    print(data[['NOx', 'PM', 'SOx', 'Energy', 'Predicted CO2']])

if __name__ == "__main__":
    predict_emissions('preprocessed_data.csv')