In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR

In [None]:
!pip install numpy==1.23.5

Collecting numpy==1.23.5
  Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Downloading numpy-1.23.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.0.2
    Uninstalling numpy-2.0.2:
      Successfully uninstalled numpy-2.0.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
blosc2 3.3.2 requires numpy>=1.26, but you have numpy 1.23.5 which is incompatible.
imbalanced-learn 0.13.0 requires numpy<3,>=1.24.3, but you have numpy 1.23.5 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.23.5 which is incompatible.
treescope 0.

In [None]:
# Load and prepare data
weather_data = pd.read_csv("/content/Weather.csv")
solar_data = pd.read_excel("/content/Solar work shop.xlsx")
solar_data['Date'] = pd.to_datetime(solar_data['Date'], errors='coerce')
weather_data['datetime'] = pd.to_datetime(weather_data['datetime'], dayfirst=True, errors='coerce')

In [None]:
# Merge datasets
data = pd.merge(weather_data, solar_data, left_on='datetime', right_on='Date', how='inner')

# Select the features from the weather data and KWH from the solar data
features = ['temp', 'humidity', 'dew', 'precip', 'cloudcover', 'solarradiation', 'solarenergy']

# Drop rows with missing values in the selected features and KWH
data = data.dropna(subset=features + ['KWH'])

# Display the cleaned dataset
print(data[features + ['KWH']])


      temp  humidity   dew  precip  cloudcover  solarradiation  solarenergy  \
0     34.1      30.6  13.8   0.000        44.9           299.1         25.7   
1     34.2      40.8  17.7   0.000        32.4           313.3         27.1   
2     33.3      48.8  20.6   0.000        32.9           304.5         26.3   
3     31.0      55.7  20.4   0.874        63.0           287.8         24.7   
4     30.1      52.5  18.0   0.000        23.0           301.2         25.9   
...    ...       ...   ...     ...         ...             ...          ...   
1091  32.5      22.0   5.3   0.000        37.5           263.4         22.8   
1092  33.3      19.0   5.5   0.000        28.6           312.0         27.0   
1093  33.1      28.0  11.5   0.000         2.6           316.4         27.2   
1094  30.6      42.5  16.0   0.000         0.2           312.8         27.0   
1095  31.3      45.2  17.7   0.000         2.4           299.3         26.0   

      KWH  
0     412  
1     438  
2     408  
3  

In [None]:
# Scale features
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X = scaler_X.fit_transform(data[features])
y = scaler_y.fit_transform(data[['KWH']])

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train SVR model
model = SVR()
model.fit(X_train, y_train.ravel())


In [None]:
# Predict
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
y_true = scaler_y.inverse_transform(y_test.reshape(-1, 1))

# Evaluate
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
r2 = r2_score(y_true, y_pred)

# Calculate regression-style accuracy
mean_actual = np.mean(y_true)
accuracy = 1 - (rmse / mean_actual)

print(f"SVR RMSE: {rmse:.2f}")
print(f"SVR R² Score: {r2:.2f}")
print(f"SVR Accuracy (approx.): {accuracy * 100:.2f}%")


SVR RMSE: 123.40
SVR R² Score: 0.35
SVR Accuracy (approx.): 61.30%


In [None]:
import joblib

# Assuming 'model' is your trained SVR model
joblib.dump(model, "svr_model.pkl")

# To download it in a notebook
from IPython.display import FileLink
FileLink("svr_model.pkl")

In [None]:
# Predict custom input
def predict_custom_input(input_dict):
    input_df = pd.DataFrame([input_dict])[features]
    input_scaled = scaler_X.transform(input_df)
    pred_scaled = model.predict(input_scaled)
    pred_kwh = scaler_y.inverse_transform(pred_scaled.reshape(-1, 1))
    return pred_kwh[0][0]

# Example input
user_input = {
    'temp': 32.4,
    'humidity': 43.4,
    'dew': 17.3,
    'precip': 0,
    'cloudcover': 40,
    'solarradiation': 289.3,
    'solarenergy': 25.0
}

predicted_kwh = predict_custom_input(user_input)
print(f"Predicted Solar Energy (KWH): {predicted_kwh:.2f}")

Predicted Solar Energy (KWH): 330.03


In [None]:
# Run this right after loading the model
print("Expected features:", model.n_features_in_)

# For scikit-learn ≥ 1.0
if hasattr(model, 'feature_names_in_'):
    print("Feature names:", model.feature_names_in_)

Expected features: 7


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import RandomForestRegressor
import joblib

# Load and prepare data
weather_data = pd.read_csv("/content/Weather.csv")
solar_data = pd.read_excel("/content/Solar work shop.xlsx")
solar_data['Date'] = pd.to_datetime(solar_data['Date'], errors='coerce')
weather_data['datetime'] = pd.to_datetime(weather_data['datetime'], dayfirst=True, errors='coerce')

# Merge datasets
data = pd.merge(weather_data, solar_data, left_on='datetime', right_on='Date', how='inner')

# Features (exclude KWH)
features = ['temp', 'humidity', 'dew', 'precip', 'cloudcover', 'solarradiation', 'solarenergy']

# Drop rows with missing values
data = data.dropna(subset=features + ['KWH'])

# Scale features
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X = scaler_X.fit_transform(data[features])
y = scaler_y.fit_transform(data[['KWH']])

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest
model = RandomForestRegressor()
model.fit(X_train, y_train.ravel())

# Predict
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
y_true = scaler_y.inverse_transform(y_test.reshape(-1, 1))

# Metrics
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
r2 = r2_score(y_true, y_pred)
accuracy = 1 - (rmse / np.mean(y_true))

print(f"Random Forest RMSE: {rmse:.2f}")
print(f"Random Forest R² Score: {r2:.2f}")
print(f"Random Forest Accuracy (approx.): {accuracy * 100:.2f}%")

# Save model
joblib.dump(model, "rf_model.pkl")

# Predict custom input
def predict_custom_input(input_dict):
    input_df = pd.DataFrame([input_dict])[features]
    input_scaled = scaler_X.transform(input_df)
    pred_scaled = model.predict(input_scaled)
    pred_kwh = scaler_y.inverse_transform(pred_scaled.reshape(-1, 1))
    return pred_kwh[0][0]

# Example input
user_input = {
    'temp': 25.5,
    'humidity': 65.2,
    'dew': 18.7,
    'precip': 0,
    'cloudcover': 30.1,
    'solarradiation': 450.3,
    'solarenergy': 1.8
}

predicted_kwh = predict_custom_input(user_input)
print(f"Predicted Solar Energy (KWH): {predicted_kwh:.2f}")


Random Forest RMSE: 130.04
Random Forest R² Score: 0.27
Random Forest Accuracy (approx.): 59.22%
Predicted Solar Energy (KWH): 412.35


In [None]:
import joblib

joblib.dump(model, "rf_model.pkl")


['rf_model.pkl']

In [None]:
import joblib

# Save model as .sav
joblib.dump(model, "rf_model.sav")

# Download the .sav file
from google.colab import files
files.download("rf_model.sav")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>