In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

# Load dataset
df = pd.read_csv("cleaned_wind_data.csv")
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Drop unwanted column
df = df.drop(columns=['theoretical_output_kW'])

# Define features and target
features = ['wind_speed', 'temperature', 'RH', 'pressure', 'gust', 'wind_dir_dev', 'precipitation']
target = 'actual_output_kW'

X = df[features]
y = df[target]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Pipeline with RandomForestRegressor (scaling optional for tree-based models)
pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42))
])

# Train model
pipeline_rf.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline_rf.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"\nRandom Forest Model Evaluation:")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"R² Score: {r2:.4f}")

# Save pipeline
joblib.dump(pipeline_rf, 'wind_power_rf_model.pkl')



Random Forest Model Evaluation:
Mean Squared Error (MSE): 975.02
R² Score: 0.9991


['wind_power_rf_model.pkl']

In [None]:
from sklearn.model_selection import cross_val_score
import numpy as np

# Perform 5-fold cross-validation using R² score
cv_scores = cross_val_score(pipeline_rf, X, y, cv=5, scoring='r2')

# Print all R² scores
print("R² scores for each fold:", cv_scores)

# Mean R² and standard deviation
print(f"\n✅ Mean R² Score (Cross-Validated): {np.mean(cv_scores):.4f}")
print(f"📉 Standard Deviation of R²: {np.std(cv_scores):.4f}")


In [None]:
import joblib

def predict_random_input(pkl_file_path):
    # Load the trained pipeline model
    model = joblib.load(pkl_file_path)

    # Define the features (same order used during training)
    features = ['wind_speed', 'temperature', 'RH', 'pressure', 'gust', 'wind_dir_dev', 'precipitation']

    # Generate random input (you can adjust ranges based on your dataset)
    random_input = {
        'wind_speed': 10,        # in m/s
        'temperature': 30.45,      # in Celsius
        'RH': 55.4,              # Relative Humidity %
        'pressure': 957.44,      # in hPa
        'gust': 15.46,              # wind gust in m/s
        'wind_dir_dev': 120.45,     # direction deviation
        'precipitation': 16.42      # mm/h
    }

    # Convert to list for model input
    input_values = list(random_input.values())

    # Predict
    predicted_output = model.predict([input_values])[0]

    print(f"\n🔮 Predicted actual_output_kW: {predicted_output:.2f}")

file = "wind_power_rf_model.pkl"
predict_random_input(file)


🔮 Predicted actual_output_kW: 2514.57


