In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

In [2]:
file_path = "C:/Users/bhumi/Downloads/londor smart meter.csv"  
data = pd.read_csv(file_path)

In [3]:
print(data.isnull().sum())

LCLid             0
day               0
energy_median     0
energy_mean       0
energy_max        0
energy_count      0
energy_std       41
energy_sum        0
energy_min        0
dtype: int64


In [5]:
from sklearn.impute import SimpleImputer
imputer = SimpleImputer(strategy='mean')
data['energy_std'] = imputer.fit_transform(data[['energy_std']])


In [6]:
print(data.isnull().sum())

LCLid            0
day              0
energy_median    0
energy_mean      0
energy_max       0
energy_count     0
energy_std       0
energy_sum       0
energy_min       0
dtype: int64


In [7]:
# Parse the 'day' column to datetime
data['day'] = pd.to_datetime(data['day'], format='%d-%m-%Y')

In [8]:
data['day_of_week'] = data['day'].dt.dayofweek  # 0=Monday, 6=Sunday
data['month'] = data['day'].dt.month
data['is_weekend'] = data['day_of_week'].apply(lambda x: 1 if x >= 5 else 0)

In [9]:
data = data.drop(['LCLid', 'day'], axis=1)  

In [10]:
X = data.drop('energy_sum', axis=1)  
y = data['energy_sum']  


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [12]:
imputer = SimpleImputer(strategy='mean')
X_train = pd.DataFrame(imputer.fit_transform(X_train), columns=X.columns)
X_test = pd.DataFrame(imputer.transform(X_test), columns=X.columns)

In [13]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [14]:
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [15]:
y_pred = model.predict(X_test)

In [16]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

Mean Squared Error: 0.08381745886541139
R-squared Score: 0.9991975770100637


In [17]:
#prediction with new data

In [18]:
new_data = pd.DataFrame({
    'energy_median': [0.5, 0.2],
    'energy_mean': [0.4, 0.3],
    'energy_max': [0.8, 0.7],
    'energy_count': [48, 36],
    'energy_std': [0.15, 0.12],  
    'energy_min': [0.1, 0.05],
    'day_of_week': [2, 5],
    'month': [12, 1],
    'is_weekend': [0, 1]
})


new_data = pd.DataFrame(imputer.transform(new_data), columns=new_data.columns)


new_data = scaler.transform(new_data)


predictions = model.predict(new_data)

print("Predicted energy_sum values:", predictions)


Predicted energy_sum values: [19.20102002 13.72816001]
