<a href="https://colab.research.google.com/github/Ashish-Bind/31_sustainability_ashish/blob/main/grid_data_classifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the unified dataset containing historical data
unified_df = pd.read_csv('/content/wind_power_data.csv', encoding='latin-1')

# Convert 'DateTime' column to datetime object
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'], format='%m-%d-%Y %H:%M', errors='coerce')
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'], format='%m/%d/%Y %H:%M', errors='coerce')

# Extract features and target variable
X = unified_df[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']]
y = unified_df['Power generated by system | (MW)']

# Train the RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Generate a placeholder dataset for the first 3 months of 2024
start_date = '2024-01-01'
end_date = '2024-03-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Generate random values for air temperature, pressure, and wind speed
np.random.seed(42)  # for reproducibility
temperature = np.random.normal(loc=15, scale=5, size=len(date_range))
pressure = np.random.normal(loc=1, scale=0.1, size=len(date_range))
wind_speed = np.random.normal(loc=10, scale=3, size=len(date_range))

# Create a DataFrame for the forecasted independent variables
forecasted_X = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed
})

# Make predictions for the forecasted period
forecasted_power = rf_model.predict(forecasted_X[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']])

# Evaluate model performance
# Predict on the training set for comparison
y_train_pred = rf_model.predict(X)

# Calculate evaluation metrics
mae = mean_absolute_error(y, y_train_pred)
mse = mean_squared_error(y, y_train_pred)
r2 = r2_score(y, y_train_pred)

print("Mean Absolute Error (MAE) on Training Set:", mae)
print("Mean Squared Error (MSE) on Training Set:", mse)
print("R-squared (R2) on Training Set:", r2)

# Create a DataFrame for the forecasted power generation
forecasted_power_df = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed,
    'Power generated by system | (MW)': forecasted_power
})

# Save the forecasted power generation to a CSV file
forecasted_power_df.to_csv('forecasted_power_generation-2_2024.csv', index=False)

Mean Absolute Error (MAE) on Training Set: 0.062250387515665216
Mean Squared Error (MSE) on Training Set: 0.012368576186568583
R-squared (R2) on Training Set: 0.9999583603431041


In [None]:
# Step 3: Power Distribution
# Calculate total predicted power generated for each hour
total_power_generated = forecasted_power_df['Power generated by system | (MW)']

# Calculate power distribution to each node based on the provided ratios
node1_power = total_power_generated * 0.20
node2_power = total_power_generated * 0.45
node3_power = total_power_generated * 0.35

# Create DataFrame for power distribution
power_distribution_df = pd.DataFrame({
    'DateTime': forecasted_power_df['DateTime'],
    'Node 1 Power (MW)': node1_power,
    'Node 2 Power (MW)': node2_power,
    'Node 3 Power (MW)': node3_power
})

# Save the power distribution data to a CSV file
power_distribution_df.to_csv('power_distribution_2024.csv', index=False)

In [None]:
import pandas as pd

# Load the predicted power distribution data for the 3 months
power_distribution_2024 = pd.read_csv('power_distribution_2024.csv')

# Calculate the total power generated for the predicted 3 months
total_power_generated = 80692.16929

# Calculate power distribution to each node based on the provided ratios
node1_power = total_power_generated * 0.20
node2_power = total_power_generated * 0.45
node3_power = total_power_generated * 0.35

# Create a DataFrame for the distributed power to nodes
nodes_data = pd.DataFrame({
    'Node': ['Node 1', 'Node 2', 'Node 3'],
    'Power (MW)': [node1_power, node2_power, node3_power]
})

# Print the distributed power to each node
print("Distributed Power to Nodes:")
print(nodes_data)

# Save the distributed power data to a CSV file
nodes_data.to_csv('distributed_power_to_nodes.csv', index=False)

Distributed Power to Nodes:
     Node    Power (MW)
0  Node 1  16138.433858
1  Node 2  36311.476181
2  Node 3  28242.259251


In [None]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load merged dataset
grid_data = pd.read_csv('grid_data.csv')

# Preprocess data if necessary
# For example, handle missing values or convert data types

# Define features and target variable
X = grid_data[['c1', 'c2', 'c3', 'p1', 'p2', 'p3']]
y = grid_data['stability']  # Stability column

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Evaluate model performance
y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Use the trained model to predict stability for the first 3 months of 2024
# You can follow similar steps as in Step 1 to load and preprocess the data,
# then use the trained model to predict stability for the forecasted period.

Accuracy: 0.7134055904164289


In [None]:
# Load the merged dataset containing relevant features
grid_data = pd.read_csv('grid_data.csv')

# Assuming 'rf_classifier' is the trained stability prediction model
# Predict stability for the first 3 months of 2024
forecasted_stability = rf_classifier.predict(grid_data[['c1', 'c2', 'c3', 'p1', 'p2', 'p3']])

# Create a DataFrame for the predicted stability
forecasted_stability_df = pd.DataFrame({
    'DateTime': grid_data['DateTime'],  # Assuming DateTime is a column in grid_data
    'predicted_stability': forecasted_stability
})

# Save the predicted stability data to a CSV file
forecasted_stability_df.to_csv('forecasted_stability_2024.csv', index=False)

In [None]:
import pandas as pd

# Load the predicted stability data
forecasted_stability_df = pd.read_csv('forecasted_stability_2024.csv')

# Calculate the count of stable and unstable grid conditions
stable_count = (forecasted_stability_df['predicted_stability'] == 'stable').sum()
unstable_count = (forecasted_stability_df['predicted_stability'] == 'unstable').sum()

# Calculate total count
total_count = len(forecasted_stability_df)

# Calculate percentage of stable and unstable grid conditions
stable_percentage = (stable_count / total_count) * 100
unstable_percentage = (unstable_count / total_count) * 100

print("Percentage of Stable Grid Conditions:", stable_percentage)
print("Percentage of Unstable Grid Conditions:", unstable_percentage)


Percentage of Stable Grid Conditions: 34.020947904068635
Percentage of Unstable Grid Conditions: 65.97905209593135
