In [None]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

In [None]:
# Load and prepare data
weather_data = pd.read_csv("/content/Manesar, HR, India 2024-02-01 to 2025-02-28.csv")
solar_data = pd.read_excel("/content/Solar generation.xlsx", skiprows=1)
solar_data = solar_data.rename(columns={'Solar energy generation': 'Date', 'Workshop (156KWp)': 'KWH'})
solar_data['Date'] = pd.to_datetime(solar_data['Date'], errors='coerce')
weather_data['datetime'] = pd.to_datetime(weather_data['datetime'])

In [None]:
# Merge datasets
data = pd.merge(weather_data, solar_data, left_on='datetime', right_on='Date', how='inner')

# Select features and target
features = ['temp', 'humidity', 'dew', 'cloudcover', 'solarradiation', 'solarenergy']
data = data.dropna(subset=features + ['KWH'])

In [None]:
# Scale features
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X = scaler_X.fit_transform(data[features])
y = scaler_y.fit_transform(data[['KWH']])

In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train XGBoost model
model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1)
model.fit(X_train, y_train.ravel())

In [None]:
# Predict
y_pred_scaled = model.predict(X_test)
y_pred = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))
y_true = scaler_y.inverse_transform(y_test)

# Evaluate
rmse = np.sqrt(mean_squared_error(y_true, y_pred))
r2 = r2_score(y_true, y_pred)

# Calculate regression-style accuracy
mean_actual = np.mean(y_true)
accuracy = 1 - (rmse / mean_actual)

print(f"XGBoost RMSE: {rmse:.2f}")
print(f"XGBoost R² Score: {r2:.2f}")
print(f"XGBoost Accuracy (approx.): {accuracy * 100:.2f}%")


XGBoost RMSE: 131.91
XGBoost R² Score: -0.04
XGBoost Accuracy (approx.): 51.47%


In [None]:
# Predict custom input
def predict_custom_input(input_dict):
    input_df = pd.DataFrame([input_dict])[features]
    input_scaled = scaler_X.transform(input_df)
    pred_scaled = model.predict(input_scaled)
    pred_kwh = scaler_y.inverse_transform(pred_scaled.reshape(-1, 1))
    return pred_kwh[0][0]

# Example input
user_input = {
    'temp': 12.8,
    'humidity': 94.4,
    'dew': 11.9,
    'cloudcover': 64.8,
    'solarradiation': 177.2,
    'solarenergy': 15.2
}

predicted_kwh = predict_custom_input(user_input)
print(f"Predicted Solar Energy (KWH): {predicted_kwh:.2f}")

Predicted Solar Energy (KWH): 104.11
