<a href="https://colab.research.google.com/github/Ashish-Bind/31_sustainability_ashish/blob/main/wind_data_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [31]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor

# Load the unified dataset containing historical data
unified_df = pd.read_csv('wind_power_data.csv')

# Convert 'DateTime' column to datetime object
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'])

# Extract features and target variable
X = unified_df[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']]
y = unified_df['Power generated by system | (MW)']

# Train the RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Generate a placeholder dataset for the first 3 months of 2024
start_date = '2024-01-01'
end_date = '2024-03-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Generate random values for air temperature, pressure, and wind speed
np.random.seed(42)  # for reproducibility
temperature = np.random.normal(loc=15, scale=5, size=len(date_range))
pressure = np.random.normal(loc=1, scale=0.1, size=len(date_range))
wind_speed = np.random.normal(loc=10, scale=3, size=len(date_range))

# Create a DataFrame for the forecasted independent variables
forecasted_X = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed
})

# Make predictions for the forecasted period
forecasted_power = rf_model.predict(forecasted_X[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']])

# Create a DataFrame for the forecasted power generation
forecasted_power_df = pd.DataFrame({'DateTime': date_range, 'Power generated by system | (MW)': forecasted_power})

# Save the forecasted power generation to a CSV file
forecasted_power_df.to_csv('forecasted_power_generation_2024.csv', index=False)


In [32]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# Load the unified dataset containing historical data
unified_df = pd.read_csv('wind_power_data.csv')

# Convert 'DateTime' column to datetime object
unified_df['DateTime'] = pd.to_datetime(unified_df['DateTime'])

# Extract features and target variable
X = unified_df[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']]
y = unified_df['Power generated by system | (MW)']

# Train the RandomForestRegressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X, y)

# Generate a placeholder dataset for the first 3 months of 2024
start_date = '2024-01-01'
end_date = '2024-03-31'
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Generate random values for air temperature, pressure, and wind speed
np.random.seed(42)  # for reproducibility
temperature = np.random.normal(loc=15, scale=5, size=len(date_range))
pressure = np.random.normal(loc=1, scale=0.1, size=len(date_range))
wind_speed = np.random.normal(loc=10, scale=3, size=len(date_range))

# Create a DataFrame for the forecasted independent variables
forecasted_X = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed
})

# Make predictions for the forecasted period
forecasted_power = rf_model.predict(forecasted_X[['Air temperature | (°C)', 'Pressure | (atm)', 'Wind speed | (m/s)']])

# Evaluate model performance
# Predict on the training set for comparison
y_train_pred = rf_model.predict(X)

# Calculate evaluation metrics
mae = mean_absolute_error(y, y_train_pred)
mse = mean_squared_error(y, y_train_pred)
r2 = r2_score(y, y_train_pred)

print("Mean Absolute Error (MAE) on Training Set:", mae)
print("Mean Squared Error (MSE) on Training Set:", mse)
print("R-squared (R2) on Training Set:", r2)

# Create a DataFrame for the forecasted power generation
forecasted_power_df = pd.DataFrame({
    'DateTime': date_range,
    'Air temperature | (°C)': temperature,
    'Pressure | (atm)': pressure,
    'Wind speed | (m/s)': wind_speed,
    'Power generated by system | (MW)': forecasted_power
})

# Save the forecasted power generation to a CSV file
forecasted_power_df.to_csv('forecasted_power_generation-2_2024.csv', index=False)


Mean Absolute Error (MAE) on Training Set: 0.28387343347874083
Mean Squared Error (MSE) on Training Set: 0.6938308237478688
R-squared (R2) on Training Set: 0.9975259489298529
