In [58]:
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [60]:
train_df = pd.read_csv('preprocessed_train_complete_feature_engineering.csv')

In [62]:
X = train_df.drop('SalePrice', axis=1)  
y = train_df['SalePrice']  

In [64]:
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

In [66]:
elastic_net_cv = ElasticNetCV(
    l1_ratio=[.1, .5, .7, .9, .95, 1], 
    alphas=np.logspace(-4, 0, 50), 
    cv=5, 
    random_state=42
)

In [68]:
elastic_net_cv.fit(X_train, y_train)

In [70]:
y_pred = elastic_net_cv.predict(X_val)

In [72]:
rmse = np.sqrt(mean_squared_error(y_val, y_pred))

In [74]:
print("Best alpha (regularization strength):", elastic_net_cv.alpha_)
print("Best L1 ratio (mixing Lasso and Ridge):", elastic_net_cv.l1_ratio_)
print("Validation RMSE:", rmse)

Best alpha (regularization strength): 0.0007906043210907702
Best L1 ratio (mixing Lasso and Ridge): 1.0
Validation RMSE: 0.1311937874163855


In [76]:
y_train_pred = elastic_net_cv.predict(X_train)
train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
print("Training RMSE:", train_rmse)

Training RMSE: 0.11493243524260977


In [78]:
print("Validation Predictions:", y_pred)

Validation Predictions: [11.92488171 12.70476955 11.53754073 12.00436091 12.70403754 11.27320399
 12.39886858 11.84494326 11.26393436 11.81516962 11.88553117 11.67803952
 11.42356476 12.31407494 12.06991535 11.79721167 12.17948986 11.79337389
 11.62742728 12.28804862 11.99389404 12.20945589 12.13011854 11.79424789
 12.21272412 11.95256197 12.20877408 11.63871245 12.0944986  12.15542426
 11.79964279 12.54917925 12.23438058 11.63275902 12.49236044 11.90108494
 11.82210487 12.2280681  12.68642334 11.53129223 11.78872964 12.34590521
 11.60676905 12.66255326 11.73720738 11.8095822  11.58308325 11.73470151
 12.9254819  11.75951432 11.68175749 12.21658082 11.63885343 12.55349785
 11.95572055 12.36089793 12.23430656 11.97981378 11.78190542 11.51058761
 11.15033802 12.0325729  12.60159066 12.42507042 12.6529812  12.24154777
 11.52316026 12.70390922 11.67621258 12.03394946 11.74837996 11.70048667
 11.59678144 11.33879311 12.97214023 12.103894   12.61157718 12.69438253
 11.88285071 11.70011689 11

In [80]:
test_df = pd.read_csv('preprocessed_test_complete_feature_engineering.csv')

if 'SalePrice' in test_df.columns:
    test_df = test_df.drop(columns=['SalePrice'])

test_predictions = elastic_net_cv.predict(test_df)

test_predictions = np.expm1(test_predictions)

submission = pd.DataFrame({
    'Id': pd.read_csv('test.csv')['Id'],  
    'SalePrice': test_predictions
})

submission.to_csv('submission.csv', index=False)

print("Test predictions saved to 'submission.csv'.")

Test predictions saved to 'submission.csv'.


In [82]:
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

y_train_pred = elastic_net_cv.predict(X_train)
y_val_pred = elastic_net_cv.predict(X_val)


train_rmse = np.sqrt(mean_squared_error(y_train, y_train_pred))
val_rmse = np.sqrt(mean_squared_error(y_val, y_val_pred))


train_r2 = r2_score(y_train, y_train_pred)
val_r2 = r2_score(y_val, y_val_pred)


print("Training RMSE: ", train_rmse)
print("Validation RMSE: ", val_rmse)

print("Training R-squared: ", train_r2)
print("Validation R-squared: ", val_r2)

Training RMSE:  0.11493243524260977
Validation RMSE:  0.1311937874163855
Training R-squared:  0.9133478507560168
Validation R-squared:  0.9077663647419987


In [84]:
import joblib

joblib.dump(elastic_net_cv, 'elastic_net_model.pkl')
print("Model saved to 'elastic_net_model.pkl'.") 

feature_names = X_train.columns  
joblib.dump(feature_names, 'feature_names.pkl')


Model saved to 'elastic_net_model.pkl'.


['feature_names.pkl']

In [86]:

loaded_model = joblib.load('elastic_net_model.pkl')


y_val_pred_loaded = loaded_model.predict(X_val)


val_rmse_loaded = np.sqrt(mean_squared_error(y_val, y_val_pred_loaded))
print("Validation RMSE after loading the model:", val_rmse_loaded)

Validation RMSE after loading the model: 0.1311937874163855
