<a href="https://colab.research.google.com/github/Ashish-Bind/31_sustainability_ashish/blob/main/wind_data_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

# Load the unified dataset containing historical data
unified_df = pd.read_csv('wind_power_data.csv')

# Convert 'DateTime' column to datetime object
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'])

# Extract features and target variable
X = unified_df[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']]
y = unified_df['Power generated by system | (MW)']

# Train the RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Generate a placeholder dataset for the first 3 months of 2024
start_date = '2024-01-01'
end_date = '2024-03-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Generate random values for air temperature, pressure, and wind speed
np.random.seed(42)  # for reproducibility
temperature = np.random.normal(loc=15, scale=5, size=len(date_range))
pressure = np.random.normal(loc=1, scale=0.1, size=len(date_range))
wind_speed = np.random.normal(loc=10, scale=3, size=len(date_range))

# Create a DataFrame for the forecasted independent variables
forecasted_X = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed
})

# Make predictions for the forecasted period
forecasted_power = rf_model.predict(forecasted_X[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']])

# Create a DataFrame for the forecasted power generation
forecasted_power_df = pd.DataFrame({'DateTime': date_range, 'Power generated by system | (MW)': forecasted_power})

# Save the forecasted power generation to a CSV file
forecasted_power_df.to_csv('forecasted_power_generation_2024.csv', index=False)


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the unified dataset containing historical data
unified_df = pd.read_csv('wind_power_data.csv')

# Convert 'DateTime' column to datetime object
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'])

# Extract features and target variable
X = unified_df[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']]
y = unified_df['Power generated by system | (MW)']

# Train the RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Generate a placeholder dataset for the first 3 months of 2024
start_date = '2024-01-01'
end_date = '2024-03-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Generate random values for air temperature, pressure, and wind speed
np.random.seed(42)  # for reproducibility
temperature = np.random.normal(loc=15, scale=5, size=len(date_range))
pressure = np.random.normal(loc=1, scale=0.1, size=len(date_range))
wind_speed = np.random.normal(loc=10, scale=3, size=len(date_range))

# Create a DataFrame for the forecasted independent variables
forecasted_X = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed
})

# Make predictions for the forecasted period
forecasted_power = rf_model.predict(forecasted_X[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']])

# Evaluate model performance
# Predict on the training set for comparison
y_train_pred = rf_model.predict(X)

# Calculate evaluation metrics
mae = mean_absolute_error(y, y_train_pred)
mse = mean_squared_error(y, y_train_pred)
r2 = r2_score(y, y_train_pred)

print("Mean Absolute Error (MAE) on Training Set:", mae)
print("Mean Squared Error (MSE) on Training Set:", mse)
print("R-squared (R2) on Training Set:", r2)

# Create a DataFrame for the forecasted power generation
forecasted_power_df = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed,
    'Power generated by system | (MW)': forecasted_power
})

# Save the forecasted power generation to a CSV file
forecasted_power_df.to_csv('forecasted_power_generation-2_2024.csv', index=False)


Mean Absolute Error (MAE) on Training Set: 0.28387343347874083
Mean Squared Error (MSE) on Training Set: 0.6938308237478688
R-squared (R2) on Training Set: 0.9975259489298529


In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the unified dataset containing historical data
unified_df = pd.read_csv('wind_power_data.csv')

# Convert 'DateTime' column to datetime object
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'])

# Extract features and target variable
X = unified_df[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']]
y = unified_df['Power generated by system | (MW)']

# Train the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X, y)

# Train the Decision Tree Regression model
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X, y)

# Generate a placeholder dataset for the first 3 months of 2024
start_date = '2024-01-01'
end_date = '2024-03-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Generate random values for air temperature, pressure, and wind speed
np.random.seed(42)  # for reproducibility
temperature = np.random.normal(loc=15, scale=5, size=len(date_range))
pressure = np.random.normal(loc=1, scale=0.1, size=len(date_range))
wind_speed = np.random.normal(loc=10, scale=3, size=len(date_range))

# Create a DataFrame for the forecasted independent variables
forecasted_X = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed
})

# Make predictions for the forecasted period using Linear Regression
forecasted_power_lr = lr_model.predict(forecasted_X[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']])

# Make predictions for the forecasted period using Decision Tree Regression
forecasted_power_dt = dt_model.predict(forecasted_X[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']])

# Create DataFrames for the forecasted power generation
forecasted_power_lr_df = pd.DataFrame({'DateTime': date_range, 'Power generated by system | (MW)': forecasted_power_lr})
forecasted_power_dt_df = pd.DataFrame({'DateTime': date_range, 'Power generated by system | (MW)': forecasted_power_dt})

# Save the forecasted power generation to CSV files
forecasted_power_lr_df.to_csv('forecasted_power_generation_lr_2024.csv', index=False)
forecasted_power_dt_df.to_csv('forecasted_power_generation_dt_2024.csv', index=False)

# Evaluate model performance on training set for comparison
# Predict on the training set for comparison
y_train_pred_lr = lr_model.predict(X)
y_train_pred_dt = dt_model.predict(X)

# Calculate evaluation metrics for Linear Regression
mae_lr = mean_absolute_error(y, y_train_pred_lr)
mse_lr = mean_squared_error(y, y_train_pred_lr)
r2_lr = r2_score(y, y_train_pred_lr)

print("Linear Regression:")
print("Mean Absolute Error (MAE) on Training Set:", mae_lr)
print("Mean Squared Error (MSE) on Training Set:", mse_lr)
print("R-squared (R2) on Training Set:", r2_lr)

# Calculate evaluation metrics for Decision Tree Regression
mae_dt = mean_absolute_error(y, y_train_pred_dt)
mse_dt = mean_squared_error(y, y_train_pred_dt)
r2_dt = r2_score(y, y_train_pred_dt)

print("\nDecision Tree Regression:")
print("Mean Absolute Error (MAE) on Training Set:", mae_dt)
print("Mean Squared Error (MSE) on Training Set:", mse_dt)
print("R-squared (R2) on Training Set:", r2_dt)


Linear Regression:
Mean Absolute Error (MAE) on Training Set: 5.6320949097805295
Mean Squared Error (MSE) on Training Set: 45.72205738726114
R-squared (R2) on Training Set: 0.8369650048159505

Decision Tree Regression:
Mean Absolute Error (MAE) on Training Set: 0.0
Mean Squared Error (MSE) on Training Set: 0.0
R-squared (R2) on Training Set: 1.0


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

# Load the unified dataset containing historical data
unified_df = pd.read_csv('wind_power_data.csv')

# Convert 'DateTime' column to datetime object
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'])

# Extract features and target variable
X = unified_df[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']]
y = unified_df['Power generated by system | (MW)']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Evaluate model performance on test set
y_pred = rf_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Absolute Error (MAE) on Test Set:", mae)
print("Mean Squared Error (MSE) on Test Set:", mse)
print("R-squared (R2) on Test Set:", r2)


Mean Absolute Error (MAE) on Test Set: 0.7649295323767498
Mean Squared Error (MSE) on Test Set: 4.959511731909032
R-squared (R2) on Test Set: 0.9829461688728157


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the unified dataset containing historical data
unified_df = pd.read_csv('wind_power_data.csv')

# Convert 'DateTime' column to datetime object
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'])

# Extract features and target variable
X = unified_df[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']]
y = unified_df['Power generated by system | (MW)']

# Train the RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Generate a placeholder dataset for the first 3 months of 2024
start_date = '2024-01-01'
end_date = '2024-03-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Load final data for the forecasted independent variables
final_data_df = pd.read_csv('./wind_power_gen_3months_validation_data.csv')

# Convert 'DateTime' column to datetime object
final_data_df['DateTime'] = pd.to_datetime(final_data_df['DateTime'])

# Merge final data with the forecasted date range
forecasted_X = pd.merge(pd.DataFrame({'DateTime': date_range}), final_data_df, on='DateTime', how='left')

# Make predictions for the forecasted period
forecasted_power = rf_model.predict(forecasted_X[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']])

# Evaluate model performance
# Predict on the training set for comparison
y_train_pred = rf_model.predict(X)

# Calculate evaluation metrics
mae = mean_absolute_error(y, y_train_pred)
mse = mean_squared_error(y, y_train_pred)
r2 = r2_score(y, y_train_pred)

print("Mean Absolute Error (MAE) on Training Set:", mae)
print("Mean Squared Error (MSE) on Training Set:", mse)
print("R-squared (R2) on Training Set:", r2)

# Create a DataFrame for the forecasted power generation
forecasted_power_df = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': forecasted_X['Air temperature | (°C)'],
    'Pressure | (atm)': forecasted_X['Pressure | (atm)'],
    'Wind speed | (m/s)': forecasted_X['Wind speed | (m/s)'],
    'Power generated by system | (MW)': forecasted_power
})

# Save the forecasted power generation to a CSV file
forecasted_power_df.to_csv('forecasted_power_generation-2_2024.csv', index=False)


Mean Absolute Error (MAE) on Training Set: 0.28387343347874083
Mean Squared Error (MSE) on Training Set: 0.6938308237478688
R-squared (R2) on Training Set: 0.9975259489298529


In [23]:
import pandas as pd

# Read the forecast CSV file
forecast_df = pd.read_csv('forecasted_power_generation-2_2024.csv')

# Calculate the sum of power generated by the system
total_power_generated = forecast_df['Power generated by system | (MW)'].sum()

print("Total power generated by the system:", total_power_generated, "MW")

grid_data_df = pd.read_csv('grid_data.csv')

node1_power = total_power_generated * 0.20
node2_power = total_power_generated * 0.45
node3_power = total_power_generated * 0.35

# Step 4: Prepare the database with distributed power and stability
node_power_df = pd.DataFrame({
    'Node': ['Node 1', 'Node 2', 'Node 3'],
    'Power': [node1_power, node2_power, node3_power]
})

# Step 5: Merge distributed power data with grid data
merged_data_df = pd.merge(grid_data_df, node_power_df, how='cross')

# Save the merged data to a new CSV file
merged_data_df.to_csv('merged_data_with_.csv', index=False)


Total power generated by the system: 80839.35528341706 MW
