# hyperparameter tuning process

In [1]:
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score

In [3]:
# Load the datasets
plant_1_data = pd.read_csv('/Users/tech/Documents/Jupyter Notebook/DataSet/SolarPower/Plant_1_Generation_Data.csv')
plant_2_data = pd.read_csv('/Users/tech/Documents/Jupyter Notebook/DataSet/SolarPower/Plant_2_Generation_Data.csv')

In [4]:
# Function to preprocess data and perform hyperparameter tuning
def tune_hyperparameters(data, plant_name):
    # Handle missing values
    data.fillna(method='ffill', inplace=True)

    # Define a threshold for inverter failure
    threshold = 100  # Adjust as necessary based on domain knowledge
    data['target'] = (data['AC_POWER'] < threshold).astype(int)

    # Select features
    features = ['DC_POWER', 'DAILY_YIELD', 'TOTAL_YIELD']
    X = data[features]
    y = data['target']

    # Split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Define the model
    model = RandomForestClassifier(random_state=42)

    # Set up the parameter grid for hyperparameter tuning
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4],
        'max_features': ['auto', 'sqrt']
    }

    # Initialize Grid Search
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, 
                               cv=5, n_jobs=-1, scoring='accuracy', verbose=2)

    # Fit Grid Search
    grid_search.fit(X_train, y_train)

    # Print the best parameters
    print(f"Best Parameters for {plant_name}:", grid_search.best_params_)
    print(f"Best Cross-Validation Score for {plant_name}:", grid_search.best_score_)

    # Evaluate the best model on the test set
    best_model = grid_search.best_estimator_
    y_pred = best_model.predict(X_test)

    # Print evaluation metrics
    print(f"Test Set Evaluation for {plant_name}:")
    print("Accuracy:", accuracy_score(y_test, y_pred))
    print(classification_report(y_test, y_pred))

In [5]:
# Tune hyperparameters for Plant 1
tune_hyperparameters(plant_1_data, "Plant 1")

  data.fillna(method='ffill', inplace=True)


Fitting 5 folds for each of 216 candidates, totalling 1080 fits


540 fits failed out of a total of 1080.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
217 fits failed with the following error:
Traceback (most recent call last):
  File "C:\anaconda\Lib\site-packages\sklearn\model_selection\_validation.py", line 895, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\anaconda\Lib\site-packages\sklearn\base.py", line 1467, in wrapper
    estimator._validate_params()
  File "C:\anaconda\Lib\site-packages\sklearn\base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "C:\anaconda\Lib\site-packages\sklearn\utils\_param_validation.py", line 95, in validate_parameter_constraints
    raise InvalidParameterError(
sklearn.utils._param_validation.InvalidPa

Best Parameters for Plant 1: {'max_depth': None, 'max_features': 'sqrt', 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}
Best Cross-Validation Score for Plant 1: 0.9999454776760374
Test Set Evaluation for Plant 1:
Accuracy: 1.0
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      6362
           1       1.00      1.00      1.00      7394

    accuracy                           1.00     13756
   macro avg       1.00      1.00      1.00     13756
weighted avg       1.00      1.00      1.00     13756



In [None]:
# Tune hyperparameters for Plant 2
tune_hyperparameters(plant_2_data, "Plant 2")