## Download the Dataset

In [None]:
# Install the Kaggle package
!pip install kaggle

# Download the dataset
!kaggle datasets download -d yasserh/housing-prices-dataset

# Unzip the dataset
!unzip housing-prices-dataset.zip -d housing-prices-dataset

## Load and Preprocess the Dataset

### Load the dataset

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

dataset = pd.read_csv('/content/housing-prices-dataset/Housing.csv')
dataset.head()

In [3]:
X = dataset.iloc[:, 1:].values
y = dataset.iloc[:, 0].values

In [None]:
print(X[0])

In [None]:
print(X[1])

### Splitting dataset

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [None]:
print(X_train[0])

In [None]:
print(y_train[0])

### Encoding Categorical variables

In [9]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [4,5,6,7,8,10,11])], remainder='passthrough')
X_train = np.array(ct.fit_transform(X_train))

In [10]:
X_test = np.array(ct.transform(X_test))

In [None]:
print(X_train[0])

In [None]:
print(X_test[0])

### Checking for missing data

In [None]:
nan_rows = np.any(np.isnan(X_train.astype(np.float64)), axis=1)  # Convert to numeric type
print(X_train[nan_rows])
# no missing data

## Training Models

### Linear Regression

In [None]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

In [None]:
import pickle
filename = 'linear_regression_model.sav'
pickle.dump(regressor, open(filename, 'wb'))

### Support Vector regression

In [None]:
y_train_svr = y_train.reshape(len(y_train),1)
y_test_svr = y_test.reshape(len(y_test),1)

In [None]:
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train_svr = sc_X.fit_transform(X_train)
y_train_svr = sc_y.fit_transform(y_train_svr)

In [None]:
from sklearn.svm import SVR
regressor = SVR(kernel = 'rbf')
regressor.fit(X_train_svr, y_train_svr)

In [None]:
y_pred = sc_y.inverse_transform(regressor.predict(sc_X.transform(X_test)).reshape(-1,1))
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test_svr),1)),1))

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

### Decision Tree Regression

In [None]:
from sklearn.tree import DecisionTreeRegressor
regressor = DecisionTreeRegressor(random_state = 0)
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

### Random Forest Regression

In [None]:
from sklearn.ensemble import RandomForestRegressor
regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1), y_test.reshape(len(y_test),1)),1))

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

### CatBoost

In [None]:
!pip install catboost
from catboost import CatBoostClassifier
classifier = CatBoostClassifier()
classifier.fit(X_train, y_train)

In [None]:
from sklearn.metrics import confusion_matrix
y_pred = classifier.predict(X_test)

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test, y_pred)

### Artificial Neural Network

In [25]:
import tensorflow as tf
X_train_ann = tf.convert_to_tensor(X_train, dtype=tf.float32)
y_train_ann = tf.convert_to_tensor(y_train, dtype=tf.float32)
y_test_ann = tf.convert_to_tensor(y_test, dtype=tf.float32)
X_test_ann = tf.convert_to_tensor(X_test, dtype=tf.float32)

In [26]:
ann = tf.keras.models.Sequential()
ann.add(tf.keras.layers.Dense(units=20, activation='relu'))
ann.add(tf.keras.layers.Dense(units=512, activation='relu'))
ann.add(tf.keras.layers.Dense(units=1024, activation='relu'))
ann.add(tf.keras.layers.Dense(units=2048, activation='relu'))
ann.add(tf.keras.layers.Dense(units=4096, activation='relu'))
ann.add(tf.keras.layers.Dense(units=8192, activation='relu'))
ann.add(tf.keras.layers.Dense(units=16384, activation='relu'))
ann.add(tf.keras.layers.Dense(units=32768, activation='relu'))
ann.add(tf.keras.layers.Dense(units=65536, activation='relu'))
ann.add(tf.keras.layers.Dense(units=1))

In [27]:
ann.compile(optimizer = 'adam', loss = 'mean_squared_error')

In [None]:
ann.fit(X_train_ann, y_train_ann, batch_size = 32, epochs = 5000)

In [None]:
y_pred = ann.predict(X_test_ann)

In [None]:
from sklearn.metrics import r2_score
r2_score(y_test_ann, y_pred)