In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder

# Read data from a CSV file (Update the correct file path)
df = pd.read_csv('house_price.csv')

# Display the first few rows to understand the dataset
print(df.head())

# Encode categorical features before splitting
label_encoders = {}
categorical_cols = ['Insulation', 'Windows', 'Energy_Efficiency_Rating']

for col in categorical_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Save encoder for later use if needed

# Features and target
X = df[['Insulation', 'Windows', 'Floor_Area_in_SqFt']]
y = df['Energy_Efficiency_Rating']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Ridge Regression Model
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)

# Predictions
y_pred = ridge.predict(X_test)

# Evaluation
mse = mean_squared_error(y_test, y_pred)
print("MSE for Energy Efficiency Prediction:", mse)

   House_ID Insulation        Windows  Floor_Area_in_SqFt  \
0         1       High  Double-Glazed                 800   
1         2       High  Double-Glazed                 810   
2         3       High  Double-Glazed                 820   
3         4       High  Double-Glazed                 830   
4         5       High  Double-Glazed                 840   

  Energy_Efficiency_Rating  Price_in_USD  
0                        A        200000  
1                        A        201500  
2                        A        203000  
3                        A        204500  
4                        A        206000  
MSE for Energy Efficiency Prediction: 0.10239999917110125
