In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score


train_file_path = r"train.csv"
train_data = pd.read_csv(train_file_path)


test_file_path = r"test.csv"
test_data = pd.read_csv(test_file_path)


X = train_data.drop(columns=['ID', 'medv'])
y = train_data['medv']


X_test = test_data.drop(columns=['ID'], errors='ignore')


X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  
X_valid_scaled = scaler.transform(X_valid)      
X_test_scaled = scaler.transform(X_test)       


model = LinearRegression()
model.fit(X_train_scaled, y_train)


y_valid_pred = model.predict(X_valid_scaled)
mse = mean_squared_error(y_valid, y_valid_pred)  
r2 = r2_score(y_valid, y_valid_pred)            

print(f"Linear Regression - MSE: {mse}, R2 Score: {r2}")


y_test_pred = model.predict(X_test_scaled)


output = pd.DataFrame({'ID': test_data['ID'], 'medv': y_test_pred})
output_file_path = r"linear_regression_predictions.csv"
output.to_csv(output_file_path, index=False)

print(f"Predictions saved to {output_file_path}")

Linear Regression - MSE: 23.486735195425798, R2 Score: 0.739031586042544
Predictions saved to linear_regression_predictions.csv


In [2]:
import pandas as pd 
from sklearn.model_selection import train_test_split  
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import Ridge, Lasso, ElasticNet 
from sklearn.metrics import mean_squared_error, r2_score  


train_file_path = r"train.csv"
train_data = pd.read_csv(train_file_path)


test_file_path = r"test.csv"
test_data = pd.read_csv(test_file_path)


X = train_data.drop(columns=['ID', 'medv'])  
y = train_data['medv']  


X_test = test_data.drop(columns=['ID'], errors='ignore')


X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size=0.2, random_state=42)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)  
X_valid_scaled = scaler.transform(X_valid)     
X_test_scaled = scaler.transform(X_test)       


models = {
    'Ridge Regression': Ridge(alpha=1.0),
    'Lasso Regression': Lasso(alpha=0.1),
    'ElasticNet': ElasticNet(alpha=0.1, l1_ratio=0.5)
}



def evaluate_model(model, X_train, y_train, X_valid, y_valid):
    model.fit(X_train, y_train)  
    y_pred = model.predict(X_valid)  
    mse = mean_squared_error(y_valid, y_pred)  
    r2 = r2_score(y_valid, y_pred)  
    return mse, r2


predictions_df = pd.DataFrame()

for name, model in models.items():
    model.fit(X_train_scaled, y_train) 
    pred_test = model.predict(X_test_scaled) 
    predictions_df[name] = pred_test 


average_prediction = predictions_df.mean(axis=1)


output = pd.DataFrame({'ID': test_data['ID'], 'medv': average_prediction})
output_file_path = r"average_predictions.csv"
output.to_csv(output_file_path, index=False)


print(f"Average of all 3 - MSE: {mse}, R2 Score: {r2}")


print(f"Predictions saved to {output_file_path}")

Average of all 3 - MSE: 23.486735195425798, R2 Score: 0.739031586042544
Predictions saved to average_predictions.csv
