In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score


In [None]:

try:
    df = pd.read_csv("data//electricity_cost_dataset.csv")
except FileNotFoundError:
    print("Error: electricity_cost_dataset.csv not found. Please ensure the dataset is in the same directory.")
    exit()

In [None]:
X = df.drop("electricity cost",axis=1)
y = df["electricity cost"]


In [None]:
X = pd.get_dummies(X, columns=["structure type"], drop_first=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [None]:

mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"\nModel Evaluation:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"R-squared (R2): {r2:.2f}")


Model Evaluation:
Mean Absolute Error (MAE): 245.65
R-squared (R2): 0.92


In [None]:
new_data_point = X_test.iloc[0:1]
predicted_cost = model.predict(new_data_point)[0]
print(f"\nExample Prediction:")
print(f"Features of example data point:\n{new_data_point.iloc[0]}")
print(f"Predicted Electricity Cost: {predicted_cost:.2f} USD")
print(f"Actual Electricity Cost (for comparison): {y_test.iloc[0]:.2f} USD")


Example Prediction:
Features of example data point:
site area                       1447
water consumption             1000.0
recycling rate                    22
utilisation rate                  69
air qality index                 179
issue reolution time              65
resident count                     0
structure type_Industrial       True
structure type_Mixed-use       False
structure type_Residential     False
Name: 6252, dtype: object
Predicted Electricity Cost: 2655.23 USD
Actual Electricity Cost (for comparison): 2027.00 USD
