In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Function to calculate base load and peak load
def calculate_loads(data):
    base_load = data.loc[(data['Date'].dt.hour >= 0) & (data['Date'].dt.hour < 9), 'Energy_Consumption_kWh'].mean()
    peak_load = data.loc[(data['Date'].dt.hour >= 9) & (data['Date'].dt.hour < 18), 'Energy_Consumption_kWh'].max()
    load_difference = peak_load - base_load
    return base_load, peak_load, load_difference

# Function to perform Monte Carlo analysis
def monte_carlo_analysis(data, num_simulations=100):
    results = []
    for _ in range(num_simulations):
        sampled_data = data.sample(frac=1, replace=True)
        base_load, peak_load, load_difference = calculate_loads(sampled_data)
        results.append((base_load, peak_load, load_difference))
    return pd.DataFrame(results, columns=['Base Load', 'Peak Load', 'Load Difference'])

# Split data into features and target variable
def split_data(data):
    X = data.drop(['Energy_Consumption_kWh', 'Date'], axis=1)
    y = data['Energy_Consumption_kWh']
    return X, y

# Train a machine learning model
def train_model(X_train, y_train):
    model = RandomForestRegressor()
    model.fit(X_train, y_train)
    return model

# Evaluate the trained model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    return mse

# Main function
def main():
    # Preprocess data
    data = preprocess_data()
    
    # Perform Monte Carlo analysis
    monte_carlo_results = monte_carlo_analysis(data)
    
    # Split data into training and testing sets
    X, y = split_data(data)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Train the model
    model = train_model(X_train, y_train)
    
    # Evaluate the model
    mse = evaluate_model(model, X_test, y_test)
    print("Mean Squared Error:", mse)
    
    # Display Monte Carlo results
    print("Monte Carlo Analysis Results:")
    print(monte_carlo_results.head())

if __name__ == "__main__":
    main()


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [3]:
# Function to calculate base load and peak load
def calculate_loads(data):
    base_load = data.loc[(data['Date'].dt.hour >= 0) & (data['Date'].dt.hour < 9), 'Energy_Consumption_kWh'].mean()
    peak_load = data.loc[(data['Date'].dt.hour >= 9) & (data['Date'].dt.hour < 18), 'Energy_Consumption_kWh'].max()
    load_difference = peak_load - base_load
    return base_load, peak_load, load_difference

# Function to perform Monte Carlo analysis
def monte_carlo_analysis(data, num_simulations=100):
    results = []
    for _ in range(num_simulations):
        sampled_data = data.sample(frac=1, replace=True)
        base_load, peak_load, load_difference = calculate_loads(sampled_data)
        results.append((base_load, peak_load, load_difference))
    return pd.DataFrame(results, columns=['Base Load', 'Peak Load', 'Load Difference'])

In [4]:
# Load and preprocess data
def preprocess_data():
    # this is the preprocessed_data from the file after cleaning, visualizing, etc 
    # Define the number of data points
    num_data_points = 1000

    # Generate random dates
    start_date = datetime(2023, 1, 1)
    end_date = datetime(2024, 12, 31)
    dates = pd.date_range(start_date, end_date, freq='H')[:num_data_points]

    # Initialize an empty DataFrame
    data = pd.DataFrame()

    # Add the 'Date' column
    data['Date'] = dates

    # Simulate power outages based on time of day
    data['Power_Outage'] = np.where((data['Date'].dt.hour >= 9) & (data['Date'].dt.hour < 18),
                                    np.random.choice([0, 1], size=num_data_points, p=[0.7, 0.3]),
                                    0)

    # Simulate fluctuations in grid reliability based on weather conditions
    data['Grid_Reliability'] = np.random.randint(0, 4, size=num_data_points)

    # Simulate variations in energy consumption during different times of the day
    data['Energy_Consumption_kWh'] = np.random.normal(loc=200, scale=50, size=num_data_points)

    # Simulate temperature fluctuations throughout the day
    data['Temperature_Celsius'] = np.random.normal(loc=20, scale=5, size=num_data_points)

    # Simulate variations in humidity based on temperature changes
    data['Humidity_Percentage'] = np.where(data['Temperature_Celsius'] > 25,
                                        np.random.normal(loc=60, scale=10, size=num_data_points),
                                        np.random.normal(loc=40, scale=10, size=num_data_points))

    # Simulate fluctuations in wind speed
    data['Wind_Speed_km/h'] = np.random.normal(loc=10, scale=5, size=num_data_points)

    # Simulate variations in solar irradiance based on time of day
    data['Solar_Irradiance_W/m2'] = np.where((data['Date'].dt.hour >= 6) & (data['Date'].dt.hour < 18),
                                            np.random.normal(loc=800, scale=200, size=num_data_points),
                                            np.random.normal(loc=200, scale=100, size=num_data_points))

    # Simulate fluctuations in electricity price
    data['Electricity_Price_$kWh'] = np.random.normal(loc=0.1, scale=0.05, size=num_data_points)

    # Simulate changes in server utilization during peak hours
    data['Server_Utilization_Percentage'] = np.where((data['Date'].dt.hour >= 9) & (data['Date'].dt.hour < 18),
                                                    np.random.normal(loc=60, scale=10, size=num_data_points),
                                                    np.random.normal(loc=30, scale=10, size=num_data_points))

    # Simulate variations in network traffic
    data['Network_Traffic_Mbps'] = np.random.normal(loc=100, scale=20, size=num_data_points)

    # Simulate fluctuations in CPU load
    data['CPU_Load_Percentage'] = np.random.normal(loc=50, scale=10, size=num_data_points)

    # Simulate changes in disk usage
    data['Disk_Usage_Percentage'] = np.random.normal(loc=60, scale=10, size=num_data_points)

    # Simulate variations in memory usage
    data['Memory_Usage_Percentage'] = np.random.normal(loc=70, scale=10, size=num_data_points)

    # Simulate changes in UPS battery level
    data['UPS_Battery_Level_Percentage'] = np.random.normal(loc=80, scale=10, size=num_data_points)

    # Simulate fluctuations in generator fuel level
    data['Generator_Fuel_Level_Percentage'] = np.random.normal(loc=70, scale=10, size=num_data_points)

    # Simulate variations in UPS runtime
    data['UPS_Runtime_Minutes'] = np.random.normal(loc=60, scale=10, size=num_data_points)

    # Simulate changes in server temperature
    data['Server_Temperature_Celsius'] = np.random.normal(loc=25, scale=5, size=num_data_points)

    # Simulate fluctuations in server humidity
    data['Server_Humidity_Percentage'] = np.random.normal(loc=40, scale=5, size=num_data_points)

    # Simulate variations in voltage stability
    data['Voltage_Stability_Score'] = np.random.randint(0, 5, size=num_data_points)

    # Simulate fluctuations in frequency stability
    data['Frequency_Stability_Score'] = np.random.randint(0, 5, size=num_data_points)

    # Add additional context to the data for all variables

    # Simulate an increase in power usage during business hours (9:00 - 18:00)
    data['Energy_Consumption_kWh'] = np.where((data['Date'].dt.hour >= 9) & (data['Date'].dt.hour < 18),
                                            data['Energy_Consumption_kWh'] + np.random.normal(loc=100, scale=20, size=num_data_points),
                                            data['Energy_Consumption_kWh'])

    # Simulate fluctuations in wind speed during peak hours (10:00 - 16:00)
    data['Wind_Speed_km/h'] = np.where((data['Date'].dt.hour >= 10) & (data['Date'].dt.hour < 16),
                                    data['Wind_Speed_km/h'] + np.random.normal(loc=5, scale=2, size=num_data_points),
                                    data['Wind_Speed_km/h'])

    # Simulate an increase in network traffic during weekdays
    data['Network_Traffic_Mbps'] = np.where((data['Date'].dt.weekday < 5),
                                            data['Network_Traffic_Mbps'] + np.random.normal(loc=20, scale=5, size=num_data_points),
                                            data['Network_Traffic_Mbps'])

    # Simulate spikes in CPU load during peak hours (9:00 - 18:00)
    data['CPU_Load_Percentage'] = np.where((data['Date'].dt.hour >= 9) & (data['Date'].dt.hour < 18),
                                            data['CPU_Load_Percentage'] + np.random.normal(loc=10, scale=5, size=num_data_points),
                                            data['CPU_Load_Percentage'])

    # Simulate variations in memory usage based on server temperature
    data['Memory_Usage_Percentage'] = np.where(data['Server_Temperature_Celsius'] > 30,
                                                data['Memory_Usage_Percentage'] + np.random.normal(loc=10, scale=5, size=num_data_points),
                                                data['Memory_Usage_Percentage'])

    # Simulate fluctuations in UPS battery level during weekends
    data['UPS_Battery_Level_Percentage'] = np.where((data['Date'].dt.weekday >= 5),
                                                    data['UPS_Battery_Level_Percentage'] + np.random.normal(loc=-10, scale=5, size=num_data_points),
                                                    data['UPS_Battery_Level_Percentage'])

    # Simulate variations in generator fuel level during grid outages (Power_Outage = 1)
    data['Generator_Fuel_Level_Percentage'] = np.where(data['Power_Outage'] == 1,
                                                        data['Generator_Fuel_Level_Percentage'] - np.random.normal(loc=20, scale=5, size=num_data_points),
                                                        data['Generator_Fuel_Level_Percentage'])

    # Simulate changes in UPS runtime based on server temperature
    data['UPS_Runtime_Minutes'] = np.where(data['Server_Temperature_Celsius'] > 30,
                                            data['UPS_Runtime_Minutes'] - np.random.normal(loc=10, scale=5, size=num_data_points),
                                            data['UPS_Runtime_Minutes'])

    # Simulate variations in server humidity during grid outages (Power_Outage = 1)
    data['Server_Humidity_Percentage'] = np.where(data['Power_Outage'] == 1,
                                                data['Server_Humidity_Percentage'] - np.random.normal(loc=10, scale=5, size=num_data_points),
                                                data['Server_Humidity_Percentage'])

    # Simulate fluctuations in voltage stability during high wind speeds
    data['Voltage_Stability_Score'] = np.where(data['Wind_Speed_km/h'] > 20,
                                            data['Voltage_Stability_Score'] - np.random.randint(0, 3, size=num_data_points),
                                            data['Voltage_Stability_Score'])

    # Simulate variations in frequency stability during power outages (Power_Outage = 1)
    data['Frequency_Stability_Score'] = np.where(data['Power_Outage'] == 1,
                                                data['Frequency_Stability_Score'] - np.random.randint(0, 3, size=num_data_points),
                                                data['Frequency_Stability_Score'])
    return data



In [5]:
# Split data into features and target variable
def split_data(data):
    X = data.drop(['Energy_Consumption_kWh', 'Date'], axis=1)
    y = data['Energy_Consumption_kWh']
    return X, y

# Train a machine learning model
def train_model(X_train, y_train):
    model = RandomForestRegressor()
    model.fit(X_train, y_train)
    return model