# Wind Power Generation Forecasting

In [None]:
pip install pandas numpy scikit-learn matplotlib seaborn

In [None]:
# importing all the necessary libraries
import pandas as pd # data manipulation
import numpy as np # numerical python - linear algebra
import matplotlib.pyplot as plt # visualization lib
import seaborn as sns
from sklearn.model_selection import train_test_split # sklearn - ML
from sklearn.preprocessing import StandardScaler # scaling

In [None]:
import pandas as pd
location1 = pd.read_csv('Location1.csv')
location2 = pd.read_csv('Location2.csv')
location3 = pd.read_csv('Location3.csv')
location4 = pd.read_csv('Location4.csv')

In [None]:
# See the location 1 
location1.head() # displays you top 5 rows

In [None]:
# Adding a new column to identify the location
location1['Location'] = 'Location1'
location2['Location'] = 'Location2'
location3['Location'] = 'Location3'
location4['Location'] = 'Location4'

# Concatenate to a dataframe
merged_data = pd.concat([location1, location2, location3, location4], ignore_index=True)

merged_data.head()

In [None]:
# save the merged dataset to a new CSV file
merged_data.to_csv('merged_locations.csv', index=False)

In [None]:
# To see the basic information about the dataset
merged_data.info()

In [None]:
# To check the statistics of Datasets
merged_data.describe().T

In [None]:
# Check the missing values
merged_data.isnull().sum()

In [None]:
# To check the duplicated rows
merged_data.duplicated().sum()

In [None]:
# Encode the categorical variables
merged_data = pd.get_dummies(merged_data, columns=['Location'], drop_first=True)
merged_data.head()

In [None]:
merged_data.columns

In [None]:
# Let's remove time col
merged_data.drop('Time', axis=1, inplace=True)

In [None]:
merged_data.head()

In [None]:
#week 2:


In [None]:
# specfix numerical data
numerical_columns = merged_data.select_dtypes(include=['number']).columns
numerical_columns

In [None]:
merged_data.shape

In [None]:
#ploting using subpolt by numerical data
import matplotlib.pyplot as plt
import seaborn as sns

fig, axes = plt.subplots(3,3,figsize=(15,16))
axes=axes.flatten()
for i, col in enumerate(numerical_columns):
    sns.histplot(merged_data[col],ax=axes[i])
    axes[i].set_title(col)
plt.tight_layout()
plt.show()

In [None]:
# we need to create a boxplot
fig, axes = plt.subplots(3,3,figsize=(15,15))
axes = axes.flatten()
for i, col in enumerate(numerical_columns):
    sns.boxplot(y=merged_data[col], ax=axes[i])
    axes[i].set_title(f'Boxplot of {col}')
    axes[i].set_ylabel(col)

plt.tight_layout()
plt.show()

In [None]:
 # scatter plots for relationship between temp&& power
merged_data.plot.scatter(x='temperature_2m', y='Power', c='blue')
plt.xlabel('Temperature (°C)')
plt.ylabel('Power (W)')
plt.title('Temperature vs Power')
plt.grid(True)
plt.show()

In [None]:
print(merged_data.columns)


In [None]:
# multiple polts
fig, axes = plt.subplots(3,6,figsize=(15,6))
axes = axes.flatten()
for i, col in enumerate(numerical_columns.drop('Power')):
    sns.scatterplot(x=col,y='Power',data=merged_data, ax=axes[i])
    axes[i].set_title(f'power vs  {col}')
    axes[i].set_xlabel(col)
    axes[i].set_ylabel(col)
plt.tight_layout()
plt.show()

In [None]:
correlation= merged_data[numerical_columns].corr()
plt.figure(figsize=(10, 10))
sns.heatmap(correlation, annot=True)
plt.title("correlation")
plt.show()

 ### week-3

In [None]:
X=merged_data.drop('Power',axis=1) 
y=merged_data['Power']

In [None]:
# Split the data into training and test sets - 80% & 20%
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scale the numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Train with Linear Regression Model
from sklearn. linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
# initialize the model
model = LinearRegression()
# Train the model
model.fit(X_train, y_train)

In [None]:
y_pred =model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')
print(f'R Squared Score: {r2}')

In [None]:
# Train using Random Forest Regressor
from sklearn.ensemble import RandomForestRegressor

# Initialize the model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the RF Model
rf_model.fit(X_train, y_train)

In [None]:
y_pred_rf = rf_model.predict(X_test)

# Evaluate the model
mae_rf = mean_absolute_error(y_test, y_pred_rf)
mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

print(f'RF MAE: {mae_rf}')
print(f'RF MSE: {mse_rf}')
print(f'RF R2 Score: {r2_rf}')


In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from xgboost import XGBRegressor

# Define the parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'learning_rate': [0.01, 0.1, 0.2],
    'max_depth': [3, 5, 7],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(
    estimator=XGBRegressor(random_state=42),
    param_grid=param_grid,
    scoring='neg_mean_absolute_error',  # Use MAE for evaluation
    cv=3,  # 3-fold cross-validation
    n_jobs=-1  # Use all available CPU cores
)

# Fit the GridSearchCV
grid_search.fit(X_train, y_train)

# Best parameters and best score
print(f'Best Parameters: {grid_search.best_params_}')
print(f'Best MAE: {-grid_search.best_score_}')

# Evaluate the tuned model
best_model = grid_search.best_estimator_
y_pred_tuned = best_model.predict(X_test)

mae_tuned = mean_absolute_error(y_test, y_pred_tuned)
mse_tuned = mean_squared_error(y_test, y_pred_tuned)
r2_tuned = r2_score(y_test, y_pred_tuned)

print(f'Tuned Model Mean Absolute Error (MAE): {mae_tuned}')
print(f'Tuned Model Mean Squared Error (MSE): {mse_tuned}')
print(f'Tuned Model R^2 Score: {r2_tuned}')


In [None]:
pip install xgboost


In [None]:
from xgboost import XGBRegressor
