# Linear Regression with Synthetic Data

In [None]:
#LINEAR REGRESSION
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from tqdm.notebook import tqdm

In [None]:
df = pd.read_csv('HousingData.csv')
df.info()
df.isna().sum()
df.dropna(inplace=True)
df.isna().sum()

In [None]:
x = df.drop(columns = ['MEDV'])
y = df['MEDV']
x_train, x_test,y_train, y_test = train_test_split(x,y,test_size=0.2)
x_train.shape, y_train.shape, x_test.shape, y_test.shape
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)


In [None]:
# y_train_scaled = scaler.fit_transform(y_train.reshape(-1, 1))
# y_test_scaled = scaler.transform(y_test.reshape(-1, 1))
model = tf.keras.models.Sequential([
    tf.keras.layers.Input(shape=(13, ), name='input-layer'),
    tf.keras.layers.Dense(100, activation = 'relu', name='hidden-layer-2'),
    tf.keras.layers.BatchNormalization(name='hidden-layer-3'),
    tf.keras.layers.Dense(50, activation = 'relu', name='hidden-layer-4'),
    tf.keras.layers.Dense(1, name='output-layer')
])


In [None]:
tf.keras.utils.plot_model(model, show_shapes=True)
model.summary()


In [None]:
model.compile(optimizer='adam', loss='mse', metrics=['mae'])
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
history = model.fit(x_train_scaled, y_train, validation_data=(x_test_scaled, y_test), epochs = 100, batch_size = 32, 
                    callbacks = [early_stop], verbose=1)


In [None]:
pd.DataFrame(history.history).plot(figsize=(10,7))
plt.title("Metrics graph")
plt.show()
# y_pred = model.predict(x_test_scaled)
y_pred = model.predict(x_test_scaled).flatten()
sns.regplot(x=y_test, y=y_pred)
plt.title("Regression Line for Predicted values")
plt.show()


In [None]:
def regression_metrics_display(y_test, y_pred):
  print(f"MAE is {metrics.mean_absolute_error(y_test, y_pred)}")
  print(f"MSE is {metrics.mean_squared_error(y_test,y_pred)}")
  print(f"R2 score is {metrics.r2_score(y_test, y_pred)}")
regression_metrics_display(y_test, y_pred)
