In [14]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split


In [15]:
df=pd.read_csv("CCPP.csv")

In [16]:
x=df[['AT','V','AP','RH']]
y=df['PE']


In [26]:
x_train_raw, x_test_raw, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [None]:
#feature scaling is very important becauuse SVR is sensitive to scale

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train_raw)  
x_test_scaled = scaler.transform(x_test_raw)        

In [None]:
#check which kernel is suitable for the training
from sklearn.svm import SVR
kernel_matrix=['rbf','linear','poly']
for i in range(len(kernel_matrix)):
    model =SVR(kernel=kernel_matrix[i])
    model.fit(x_train_scaled,y_train)
    print(f"The score for {kernel_matrix[i]} is {model.score(x_test_scaled,y_test)}")


The score for rbf is 0.9421736713419648
The score for linear is 0.9291214151716456
The score for poly is 0.7675349479888542


In [None]:
#Now we know 'rbf' is suitable..So we have to find the better c value

from sklearn.model_selection import cross_val_score

C_values = [0.1, 1, 10, 100, 1000]
cv_scores = []

for C in C_values:
    svr = SVR(kernel='rbf', C=C, epsilon=0.05, gamma='scale')
    scores = cross_val_score(svr, x_train_scaled, y_train, cv=5, scoring='r2')
    cv_scores.append(scores.mean())
    print(f"C={C:6} → Mean CV R²: {scores.mean():.4f} ± {scores.std():.4f}")

C=   0.1 → Mean CV R²: 0.9070 ± 0.0060
C=     1 → Mean CV R²: 0.9378 ± 0.0050
C=    10 → Mean CV R²: 0.9419 ± 0.0052
C=   100 → Mean CV R²: 0.9431 ± 0.0055
C=  1000 → Mean CV R²: 0.9433 ± 0.0055


In [31]:
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error

#Let the kernel is rbf using previous results
model=SVR(kernel='rbf',C=100)
model.fit(x_train_scaled,y_train)
y_pred= model.predict(x_test_scaled)
r2 =r2_score(y_test,y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))
mae= mean_absolute_error(y_test,y_pred)

In [32]:
print("Super vector regressor Performance")
print("-----------------------------------")
print(f"MAE  : {mae}")
print(f"RMSE : {rmse}")
print(f"R²   : {r2}")

Super vector regressor Performance
-----------------------------------
MAE  : 2.951115749266169
RMSE : 3.9151710734319005
R²   : 0.9471534465663622


In [None]:
#We have to scale our tesing data set also.otherwise it will produce some errors

test_samples = np.array([[33.15,70.4,1003.72,53.98],
                         [18.27,65.94,1013.09,84.72],
                         [13.74,38.73,1004.34,82.82]])

test_scaled = scaler.transform(test_samples)  
predictions = model.predict(test_scaled)





In [36]:
print("\nPredictions for 3 test samples:")
for i, pred in enumerate(predictions):
    print(f"Sample {i+1}: {pred:.2f} MW")



Predictions for 3 test samples:
Sample 1: 430.16 MW
Sample 2: 451.48 MW
Sample 3: 470.63 MW
