<a href="https://colab.research.google.com/github/RittikaDev/SolarPrediction/blob/main/rf_baseline.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, PowerTransformer, OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [3]:
# ----------------- Load Data -----------------
df = pd.read_excel("Solardata_final_1.xlsx")
df["Date and Time"] = pd.to_datetime(df["Date and Time"])
df["hour"] = df["Date and Time"].dt.hour
df["dayofweek"] = df["Date and Time"].dt.dayofweek
df["month"] = df["Date and Time"].dt.month

In [4]:
target = "Output Power (kW)"
X = df.drop(columns=["SL No.", "Date and Time", target])              # ALL FEATURES EXCEPT IDENTIFIERS AND THE TARGET.
y = df[target]                                                        # WHAT WE WANT TO PREDICT

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
num_cols = X.select_dtypes(include=["number"]).columns                # NUMERIC FEATURES LIKE GHI, TEMPERATURE, WIND SPEED.
cat_cols = X.select_dtypes(include=["object", "category"]).columns    # CATEGORICAL FEATURES LIKE "GRID SIDE OR OTHER FAULT".

In [37]:
# SimpleImputer(strategy="median") → FILLS MISSING NUMERIC VALUES WITH MEDIAN.
# PowerTransformer() → MAKES DATA MORE GAUSSIAN-LIKE, HELPS RANDOMFOREST HANDLE SKEWED DISTRIBUTIONS.
# StandardScaler() → SCALES FEATURES TO MEAN=0, STD=1 (HELPS IF MODEL RELIES ON DISTANCES, LESS CRITICAL FOR RANDOMFOREST BUT GOOD PRACTICE).
# OneHotEncoder() → CONVERTS CATEGORIES TO ONE-HOT ENCODING SO THE MODEL CAN USE THEM NUMERICALLY.
# CREATES TWO SMALL PIPELINES:
# ONE FOR NUMERIC COLUMNS (NUMERIC_TRANSFORMER)
# ONE FOR CATEGORICAL COLUMNS (CATEGORICAL_TRANSFORMER)
# BUT AT THIS POINT, THEY ARE NOT APPLIED TO THE DATAFRAME YET.

numeric_transformer = Pipeline([("imputer", SimpleImputer(strategy="median")),
                                ("power", PowerTransformer()),
                                ("scaler", StandardScaler())])

categorical_transformer = Pipeline([("imputer", SimpleImputer(strategy="most_frequent")),
                                    ("onehot", OneHotEncoder(handle_unknown="ignore", sparse_output=False))])

In [38]:
# COLUMNTRANSFORMER ACTUALLY APPLIES THE NUMERIC AND CATEGORICAL PIPELINES TO THE CORRECT COLUMNS.
# "num": THIS IS JUST A LABEL FOR THIS STEP INSIDE THE COLUMNTRANSFORMER.
# numeric_transformer: IT DEFINES WHAT TO DO WITH NUMERIC DATA
# num_cols: THIS IS A LIST OF COLUMN NAMES IN THE DATASET THAT ARE NUMERIC.

preprocessor = ColumnTransformer([
    ("num", numeric_transformer, num_cols),
    ("cat", categorical_transformer, cat_cols)
])

In [10]:
model = Pipeline([("preproc", preprocessor),
                  ("rf", RandomForestRegressor(random_state=42, n_jobs=-1))])

In [12]:
model.fit(X_train, y_train)
preds = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, preds))
print("RMSE:", mean_squared_error(y_test, preds))
print("R²:", r2_score(y_test, preds))

MAE: 0.3791866476365646
RMSE: 2.811996831610028
R²: 0.9999999285494083


In [53]:
!git add rf_baseline.ipynb
!git commit -m "doc: add comments for better clarity"
!git push origin main

On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
Everything up-to-date


In [56]:
!pwd
!ls -l

/content/SolarPrediction
total 8
drwxr-xr-x 2 root root 4096 Sep 20 04:14 RandomForest
-rw-r--r-- 1 root root  730 Sep 20 05:32 README.md


In [57]:
%cd /content/SolarPrediction


/content/SolarPrediction


On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
Everything up-to-date


In [61]:
!git add .
!git commit -m "doc: add comments for better clarity"
!git push origin main

On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
Everything up-to-date
