In [33]:
# Linear Regression Model

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Load the Boston Housing dataset
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", header=None, sep="\s+")
data.columns = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]

# Select the input and output features
X = data[["RM", "LSTAT", "PTRATIO"]]
y = data["MEDV"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the R-squared score (a common accuracy metric for regression models)
r2 = r2_score(y_test, y_pred)

# Print the R-squared score
print("R-squared score:", r2)


R-squared score: 0.650904156861472


In [34]:
# support vector regression (SVR) model

import pandas as pd
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Load the Boston Housing dataset
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", header=None, sep="\s+")
data.columns = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]

# Select the input and output features
X = data[["RM", "LSTAT", "PTRATIO"]]
y = data["MEDV"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train an SVR model
model = SVR(kernel="linear", C=1.0, epsilon=0.1)
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the R-squared score (a common accuracy metric for regression models)
r2 = r2_score(y_test, y_pred)

# Print the R-squared score
print("R-squared score:", r2)


R-squared score: 0.6493482803030897


In [35]:
# random forest regression model

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Load the Boston Housing dataset
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", header=None, sep="\s+")
data.columns = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]

# Select the input and output features
X = data[["RM", "LSTAT", "PTRATIO"]]
y = data["MEDV"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a random forest regression model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the R-squared score (a common accuracy metric for regression models)
r2 = r2_score(y_test, y_pred)

# Print the R-squared score
print("R-squared score:", r2)


R-squared score: 0.779597922742391


In [36]:
# gradient boosting regression model

import pandas as pd
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

# Load the Boston Housing dataset
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", header=None, sep="\s+")
data.columns = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]

# Select the input and output features
X = data[["RM", "LSTAT", "PTRATIO"]]
y = data["MEDV"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Train a gradient boosting regression model
model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the R-squared score (a common accuracy metric for regression models)
r2 = r2_score(y_test, y_pred)

# Print the R-squared score
print("R-squared score:", r2)


R-squared score: 0.756325877583729


In [37]:
# neural network regression model

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from sklearn.metrics import r2_score

# Load the Boston Housing dataset
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", header=None, sep="\s+")
data.columns = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]

# Select the input and output features
X = data[["RM", "LSTAT", "PTRATIO"]]
y = data["MEDV"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build a neural network regression model
model = Sequential()
model.add(Dense(64, input_dim=3, activation="relu"))
model.add(Dense(32, activation="relu"))
model.add(Dense(1, activation="linear"))
model.compile(loss="mean_squared_error", optimizer="adam")

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=0)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the R-squared score (a common accuracy metric for regression models)
r2 = r2_score(y_test, y_pred)

# Print the R-squared score
print("R-squared score:", r2)

R-squared score: 0.7676306330090721


In [38]:
# K-nearest neighbors regression

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import r2_score

# Load the Boston Housing dataset
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", header=None, sep="\s+")
data.columns = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]

# Select the input and output features
X = data[["RM", "LSTAT", "PTRATIO"]]
y = data["MEDV"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build a K-nearest neighbors regression model
model = KNeighborsRegressor(n_neighbors=5, weights="distance")

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the R-squared score (a common accuracy metric for regression models)
r2 = r2_score(y_test, y_pred)

# Print the R-squared score
print("R-squared score:", r2)


R-squared score: 0.7694539011050399


In [39]:
# Decision tree regression

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import r2_score

# Load the Boston Housing dataset
data = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/housing/housing.data", header=None, sep="\s+")
data.columns = ["CRIM", "ZN", "INDUS", "CHAS", "NOX", "RM", "AGE", "DIS", "RAD", "TAX", "PTRATIO", "B", "LSTAT", "MEDV"]

# Select the input and output features
X = data[["RM", "LSTAT", "PTRATIO"]]
y = data["MEDV"]

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Scale the input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build a Decision tree regression model
model = DecisionTreeRegressor(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Calculate the R-squared score (a common accuracy metric for regression models)
r2 = r2_score(y_test, y_pred)

# Print the R-squared score
print("R-squared score:", r2)


R-squared score: 0.5280986693762859


In [43]:
#                                                                          Position
# Linear Regression : R-squared score: 0.650904156861472                  | 5th
# Support Vector Regression (SVR) : R-squared score: 0.6493482803030897   | 6th
# Random Forest Regression : R-squared score: 0.779597922742391           | 1st
# Gradient Boosting Regression : R-squared score: 0.756325877583729       | 4th
# Neural Networks : R-squared score: 0.7676306330090721                   | 3rd
# K-Nearest Neighbors Regression : R-squared score: 0.7694539011050399    | 2nd
# Decision Tree Regression : R-squared score: 0.5280986693762859          | 7th

In [41]:
# Highest was random forrest regression model

# performing Test on random forrest regression model
# Ask the user to input values for the features
rm = float(input("Enter the average number of rooms per dwelling (RM): "))
lstat = float(input("Enter the percentage of lower status of the population (LSTAT): "))
ptratio = float(input("Enter the pupil-teacher ratio by town (PTRATIO): "))

# Create a dataframe with the user inputs
user_inputs = pd.DataFrame({"RM": [rm], "LSTAT": [lstat], "PTRATIO": [ptratio]})

# Scale the user inputs using the same scaler used for training the model
user_inputs_scaled = scaler.transform(user_inputs)

# Use the trained random forest regression model to make a prediction on the user inputs
predicted_price = model.predict(user_inputs_scaled)

# Print the predicted MEDV value for the user inputs
print("Predicted MEDV value:", predicted_price[0])


Enter the average number of rooms per dwelling (RM): 6.5
Enter the percentage of lower status of the population (LSTAT): 5
Enter the pupil-teacher ratio by town (PTRATIO): 15
Predicted MEDV value: 24.8
