In [1]:
%pip install plotly

Note: you may need to restart the kernel to use updated packages.


In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns

import matplotlib.pyplot as plt

In [47]:
new = pd.read_csv('housing.csv')
new.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
0,-122.23,37.88,41,880,129.0,322,126,8.3252,NEAR BAY,452600
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,NEAR BAY,358500
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,NEAR BAY,352100
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,NEAR BAY,341300
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,NEAR BAY,342200


In [49]:
new.shape

(20640, 10)

In [50]:
new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 10 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   longitude           20640 non-null  float64
 1   latitude            20640 non-null  float64
 2   housing_median_age  20640 non-null  int64  
 3   total_rooms         20640 non-null  int64  
 4   total_bedrooms      20433 non-null  float64
 5   population          20640 non-null  int64  
 6   households          20640 non-null  int64  
 7   median_income       20640 non-null  float64
 8   ocean_proximity     20640 non-null  object 
 9   median_house_value  20640 non-null  int64  
dtypes: float64(4), int64(5), object(1)
memory usage: 1.6+ MB


In [51]:
new.describe()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value
count,20640.0,20640.0,20640.0,20640.0,20433.0,20640.0,20640.0,20640.0,20640.0
mean,-119.569704,35.631861,28.639486,2635.763081,537.870553,1425.476744,499.53968,3.870671,206855.816909
std,2.003532,2.135952,12.585558,2181.615252,421.38507,1132.462122,382.329753,1.899822,115395.615874
min,-124.35,32.54,1.0,2.0,1.0,3.0,1.0,0.4999,14999.0
25%,-121.8,33.93,18.0,1447.75,296.0,787.0,280.0,2.5634,119600.0
50%,-118.49,34.26,29.0,2127.0,435.0,1166.0,409.0,3.5348,179700.0
75%,-118.01,37.71,37.0,3148.0,647.0,1725.0,605.0,4.74325,264725.0
max,-114.31,41.95,52.0,39320.0,6445.0,35682.0,6082.0,15.0001,500001.0


In [52]:
new.isnull().sum()

longitude               0
latitude                0
housing_median_age      0
total_rooms             0
total_bedrooms        207
population              0
households              0
median_income           0
ocean_proximity         0
median_house_value      0
dtype: int64

In [None]:
#Null in total_bedrooms. We can either drop the rows or fill the null values with the mean or median of the column.

In [53]:
import plotly.graph_objects as go

fig = go.Figure()

# Add histogram trace
fig.add_trace(go.Histogram(
    x=new['total_bedrooms'],
    nbinsx=50,
    name='Total Bedrooms'
))

# Add mean line
fig.add_vline(x=new['total_bedrooms'].mean(), 
              line_dash="dash", 
              line_color="red",
              annotation_text="Mean",
              annotation_position="top right")

# Add median line
fig.add_vline(x=new['total_bedrooms'].median(), 
              line_dash="dash", 
              line_color="green",
              annotation_text="Median",
              annotation_position="top left")

# Update layout
fig.update_layout(
    title='Distribution of Total Bedrooms',
    xaxis_title='Total Bedrooms',
    yaxis_title='Count',
    width=1000,
    height=600
)

fig.show()

In [54]:
skewness = new['total_bedrooms'].skew()
kurtosis = new['total_bedrooms'].kurtosis()

fig = go.Figure()

# Add histogram trace
fig.add_trace(go.Histogram(
    x=new['total_bedrooms'],
    nbinsx=50,
    name='Total Bedrooms'
))

# Add stats annotations
fig.add_annotation(
    text=f'Skewness: {skewness:.2f}<br>Kurtosis: {kurtosis:.2f}',
    xref="paper", yref="paper",
    x=0.98, y=0.95,
    showarrow=False,
    bgcolor='white',
    bordercolor='black',
    borderwidth=1
)

# Update layout
fig.update_layout(
    title='Distribution of Total Bedrooms with Skewness and Kurtosis',
    xaxis_title='Total Bedrooms',
    yaxis_title='Count',
    width=1000,
    height=600
)

fig.show()

Positively skewed and High kurtosis. We can use median since its not affected majorly by outliers

In [55]:
median_val = new['total_bedrooms'].median()
new['total_bedrooms'].fillna(median_val, inplace=True)



A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





In [56]:
new.isnull().sum()

longitude             0
latitude              0
housing_median_age    0
total_rooms           0
total_bedrooms        0
population            0
households            0
median_income         0
ocean_proximity       0
median_house_value    0
dtype: int64

In [None]:
new.to_csv('housing_cleaned.csv', index=False)

In [3]:
df = pd.read_csv('housing_cleaned.csv')
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,ocean_proximity,median_house_value
0,-122.23,37.88,41,880,129.0,322,126,8.3252,NEAR BAY,452600
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,NEAR BAY,358500
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,NEAR BAY,352100
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,NEAR BAY,341300
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,NEAR BAY,342200


One hot encode the categorical data ocean proximity

In [4]:
# Get one hot encoding of ocean_proximity
encoded_columns = pd.get_dummies(df['ocean_proximity'], dtype=int)

# Drop the original column and concatenate the encoded columns
df = pd.concat([df.drop('ocean_proximity', axis=1), encoded_columns], axis=1)

In [5]:
df.head()

Unnamed: 0,longitude,latitude,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,<1H OCEAN,INLAND,ISLAND,NEAR BAY,NEAR OCEAN
0,-122.23,37.88,41,880,129.0,322,126,8.3252,452600,0,0,0,1,0
1,-122.22,37.86,21,7099,1106.0,2401,1138,8.3014,358500,0,0,0,1,0
2,-122.24,37.85,52,1467,190.0,496,177,7.2574,352100,0,0,0,1,0
3,-122.25,37.85,52,1274,235.0,558,219,5.6431,341300,0,0,0,1,0
4,-122.25,37.85,52,1627,280.0,565,259,3.8462,342200,0,0,0,1,0


In [6]:
#Feature combine lat and long to get location
# Calculate Euclidean distance from a reference point in California (using state center approx)
# California's approximate center: 36.7783° N, 119.4179° W
CA_LAT = 36.7783
CA_LONG = -119.4179
df['location'] = np.sqrt((df['latitude'] - CA_LAT)**2 + (df['longitude'] - CA_LONG)**2)
df.drop(['latitude', 'longitude'], axis=1, inplace=True)

df.head()

Unnamed: 0,housing_median_age,total_rooms,total_bedrooms,population,households,median_income,median_house_value,<1H OCEAN,INLAND,ISLAND,NEAR BAY,NEAR OCEAN,location
0,41,880,129.0,322,126,8.3252,452600,0,0,0,1,0,3.020207
1,21,7099,1106.0,2401,1138,8.3014,358500,0,0,0,1,0,3.003638
2,52,1467,190.0,496,177,7.2574,352100,0,0,0,1,0,3.01874
3,52,1274,235.0,558,219,5.6431,341300,0,0,0,1,0,3.02809
4,52,1627,280.0,565,259,3.8462,342200,0,0,0,1,0,3.02809


In [7]:

X = df.drop(columns=["median_house_value"])  # Drop the target column to get features
y = df["median_house_value"]  # Target variable


In [8]:
#Split data into training, evaluation, and test sets
from sklearn.model_selection import train_test_split

#split into train and test 80% and 20% on test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#split train to evaluation
X_train, X_eval, y_train, y_eval = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

print(f"Train set size: {X_train.shape}, {y_train.shape}")
print(f"Validation set size: {X_eval.shape}, {y_eval.shape}")
print(f"Test set size: {X_test.shape}, {y_test.shape}")


Train set size: (13209, 12), (13209,)
Validation set size: (3303, 12), (3303,)
Test set size: (4128, 12), (4128,)


Scale the dataset. Scale X_train and transform the eval and test.

In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_eval_scaled = scaler.transform(X_eval)
X_test_scaled = scaler.transform(X_test)

In [10]:
# we will have to scale y as well since the ranges are high which will make the optimization harder
scaler_y = StandardScaler()
y_train_scaled = scaler_y.fit_transform(y_train.values.reshape(-1, 1))

#transform evaluation and test set
y_eval_scaled = scaler_y.transform(y_eval.values.reshape(-1, 1))
y_test_scaled = scaler_y.transform(y_test.values.reshape(-1, 1))

#y_pred_original = scaler_y.inverse_transform(y_pred_scaled.reshape(-1, 1))


In [14]:

from sklearn.linear_model import LinearRegression

from sklearn.metrics import root_mean_squared_error  as rmse

from sklearn.metrics import r2_score as r2_score

from sklearn.metrics import mean_absolute_error as mae



# Train the model

model = LinearRegression()

model.fit(X_train_scaled, y_train_scaled)



y_pred_eval_scaled = model.predict(X_eval_scaled)



# 3. Inverse transform the predictions to original scale (if needed)

y_pred_eval_original = scaler_y.inverse_transform(y_pred_eval_scaled.reshape(-1, 1))



# 4. Calculate evaluation metrics like MAE, MSE, and R² for evaluation data

mae_eval = mae(y_eval, y_pred_eval_original)

mse_eval = rmse(y_eval, y_pred_eval_original)

r2_eval = r2_score(y_eval, y_pred_eval_original)



# Print the evaluation metrics

print(f"Evaluation MAE: {mae_eval}")

print(f"Evaluation MSE: {mse_eval}")

print(f"Evaluation R²: {r2_eval}")


Evaluation MAE: 51396.51205226143
Evaluation MSE: 70437.50081587934
Evaluation R²: 0.6404720922155447


In [39]:
from plotly.subplots import make_subplots

import plotly.graph_objects as go

# Create a figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add scatter plot for predictions
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_pred_eval_original.flatten(),
        mode='markers',
        name='Predicted vs Actual',
        marker=dict(color='blue', size=8, opacity=0.7)
    ),
    secondary_y=False
)

# Add perfect prediction line
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_eval.values,
        mode='lines',
        name='Perfect Prediction Line',
        line=dict(color='red', dash='dash')
    ),
    secondary_y=False
)

# Calculate RMSE from MSE


# Add MSE loss line
fig.add_trace(
    go.Scatter(
        x=[y_eval.values.min(), y_eval.values.max()],
        y=[mae_eval, mae_eval],
        mode='lines',
        name=f'MAE: {mae_eval:.2f}',
        line=dict(color='black', dash='solid')
    ),
    secondary_y=True
)

# Update layout
fig.update_layout(
    title="Linear Regression: Predicted vs Actual (Evaluation Data)",
    xaxis_title="Actual Values",
    yaxis_title="Predicted Values",
    yaxis2_title="MAE",
    showlegend=True
)

# Show the plot
fig.show()


In [19]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error as mse 
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score

# Initialize the KNN model
knn = KNeighborsRegressor(n_neighbors=7)

# Train the model with scaled data
knn.fit(X_train_scaled, y_train_scaled)

# Predict on the evaluation set (scaled)
y_pred_eval_scaled_knn = knn.predict(X_eval_scaled)

# Inverse transform the predictions to the original scale
y_pred_eval_original_knn = scaler_y.inverse_transform(y_pred_eval_scaled_knn.reshape(-1, 1))

# Calculate evaluation metrics
mae_eval_knn = mae(y_eval, y_pred_eval_original_knn)
mse_eval_knn = mse(y_eval, y_pred_eval_original_knn)
rmse_eval_knn = mse_eval_knn ** 0.5  # Taking square root of MSE to get RMSE
r2_eval_knn = r2_score(y_eval, y_pred_eval_original_knn)

# Print the evaluation metrics
print(f"KNN Evaluation MAE: {mae_eval_knn}")
print(f"KNN Evaluation MSE: {mse_eval_knn}")
print(f"KNN Evaluation RMSE: {rmse_eval_knn}")
print(f"KNN Evaluation R²: {r2_eval_knn}")


KNN Evaluation MAE: 41996.97426581895
KNN Evaluation MSE: 3769613342.4830737
KNN Evaluation RMSE: 61397.176991153865
KNN Evaluation R²: 0.7268372120498032


In [43]:
from plotly.subplots import make_subplots

import plotly.graph_objects as go

# Create a figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add scatter plot for predictions
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_pred_eval_original_knn.flatten(),
        mode='markers',
        name='Predicted vs Actual',
        marker=dict(color='orange', size=8, opacity=0.7)
    ),
    secondary_y=False
)

# Add perfect prediction line
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_eval.values,
        mode='lines',
        name='Perfect Prediction Line',
        line=dict(color='red', dash='dash')
    ),
    secondary_y=False
)

# Calculate RMSE from MSE


# Add MSE loss line
fig.add_trace(
    go.Scatter(
        x=[y_eval.values.min(), y_eval.values.max()],
        y=[mae_eval_knn, mae_eval_knn],
        mode='lines',
        name=f'MAE: {mae_eval_knn:.2f}',
        line=dict(color='black', dash='solid')
    ),
    secondary_y=True
)

# Update layout
fig.update_layout(
    title="K-Nearest Regression: Predicted vs Actual (Evaluation Data)",
    xaxis_title="Actual Values",
    yaxis_title="Predicted Values",
    yaxis2_title="MAE",
    showlegend=True
)

# Show the plot
fig.show()


In [24]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score

# Initialize the Random Forest Regressor
rf = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model with scaled data
rf.fit(X_train_scaled, y_train_scaled)

# Predict on the evaluation set (scaled)
y_pred_eval_scaled_rf = rf.predict(X_eval_scaled)

# Inverse transform the predictions to the original scale
y_pred_eval_original_rf = scaler_y.inverse_transform(y_pred_eval_scaled_rf.reshape(-1, 1))

# Calculate evaluation metrics
mae_eval_rf = mae(y_eval, y_pred_eval_original_rf)
mse_eval_rf = mse(y_eval, y_pred_eval_original_rf)
rmse_eval_rf = mse_eval_rf ** 0.5  # Taking square root of MSE to get RMSE
r2_eval_rf = r2_score(y_eval, y_pred_eval_original_rf)

# Print the evaluation metrics
print(f"Random Forest Evaluation MAE: {mae_eval_rf}")
print(f"Random Forest Evaluation MSE: {mse_eval_rf}")
print(f"Random Forest Evaluation RMSE: {rmse_eval_rf}")
print(f"Random Forest Evaluation R²: {r2_eval_rf}")


  return fit_method(estimator, *args, **kwargs)


Random Forest Evaluation MAE: 38550.08590977899
Random Forest Evaluation MSE: 3227400904.125896
Random Forest Evaluation RMSE: 56810.21830732475
Random Forest Evaluation R²: 0.7661283137799765


In [44]:
from plotly.subplots import make_subplots

import plotly.graph_objects as go

# Create a figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add scatter plot for predictions
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_pred_eval_original_rf.flatten(),
        mode='markers',
        name='Predicted vs Actual',
        marker=dict(color='purple', size=8, opacity=0.7)
    ),
    secondary_y=False
)

# Add perfect prediction line
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_eval.values,
        mode='lines',
        name='Perfect Prediction Line',
        line=dict(color='red', dash='dash')
    ),
    secondary_y=False
)

# Calculate RMSE from MSE


# Add MSE loss line
fig.add_trace(
    go.Scatter(
        x=[y_eval.values.min(), y_eval.values.max()],
        y=[mae_eval_rf, mae_eval_rf],
        mode='lines',
        name=f'MAE: {mae_eval_rf:.2f}',
        line=dict(color='black', dash='solid')
    ),
    secondary_y=True
)

# Update layout
fig.update_layout(
    title="Random Forest Regression: Predicted vs Actual (Evaluation Data)",
    xaxis_title="Actual Values",
    yaxis_title="Predicted Values",
    yaxis2_title="MAE",
    showlegend=True
)

# Show the plot
fig.show()


In [None]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error as mse
from sklearn.metrics import mean_absolute_error as mae
from sklearn.metrics import r2_score

# Define the model
gbr = GradientBoostingRegressor(random_state=42)

# Define the parameter grid for grid search
param_grid = {
    'n_estimators': [50, 100, 150],          # Number of trees
    'learning_rate': [0.01, 0.05, 0.1],      # Learning rate
    'max_depth': [3, 5, 7],                  # Depth of the trees
    'subsample': [0.8, 1.0],                 # Fraction of samples used to fit each tree
    'min_samples_split': [2, 5],              # Minimum samples to split a node
}

# Set up GridSearchCV
grid_search = GridSearchCV(estimator=gbr, param_grid=param_grid, cv=5, n_jobs=-1, scoring='neg_mean_squared_error')

# Fit GridSearchCV to the data
grid_search.fit(X_train_scaled, y_train_scaled)

# Get the best hyperparameters from the grid search
best_params = grid_search.best_params_
print(f"Best hyperparameters: {best_params}")

# Use the best model to predict on the evaluation set
best_gbr = grid_search.best_estimator_
y_pred_eval_scaled_gbr = best_gbr.predict(X_eval_scaled)

# Inverse transform the predictions to the original scale
y_pred_eval_original_gbr = scaler_y.inverse_transform(y_pred_eval_scaled_gbr.reshape(-1, 1))

# Calculate evaluation metrics
mae_eval_gbr = mae(y_eval, y_pred_eval_original_gbr)
mse_eval_gbr = mse(y_eval, y_pred_eval_original_gbr)
rmse_eval_gbr = mse_eval_gbr ** 0.5  # Taking square root of MSE to get RMSE
r2_eval_gbr = r2_score(y_eval, y_pred_eval_original_gbr)

# Print the evaluation metrics
print(f"Gradient Boosting Evaluation MAE: {mae_eval_gbr}")
print(f"Gradient Boosting Evaluation MSE: {mse_eval_gbr}")
print(f"Gradient Boosting Evaluation RMSE: {rmse_eval_gbr}")
print(f"Gradient Boosting Evaluation R²: {r2_eval_gbr}")


  y = column_or_1d(y, warn=True)  # TODO: Is this still required?


Best hyperparameters: {'learning_rate': 0.1, 'max_depth': 7, 'min_samples_split': 5, 'n_estimators': 150, 'subsample': 1.0}
Gradient Boosting Evaluation MAE: 37866.2134089896
Gradient Boosting Evaluation MSE: 3043951221.994419
Gradient Boosting Evaluation RMSE: 55172.01484443376
Gradient Boosting Evaluation R²: 0.7794218858434185


In [45]:
from plotly.subplots import make_subplots

import plotly.graph_objects as go

# Create a figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add scatter plot for predictions
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_pred_eval_original_gbr.flatten(),
        mode='markers',
        name='Predicted vs Actual',
        marker=dict(color='green', size=8, opacity=0.7)
    ),
    secondary_y=False
)

# Add perfect prediction line
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_eval.values,
        mode='lines',
        name='Perfect Prediction Line',
        line=dict(color='red', dash='dash')
    ),
    secondary_y=False
)

# Calculate RMSE from MSE


# Add MSE loss line
fig.add_trace(
    go.Scatter(
        x=[y_eval.values.min(), y_eval.values.max()],
        y=[mae_eval_gbr, mae_eval_gbr],
        mode='lines',
        name=f'MAE: {mae_eval_gbr:.2f}',
        line=dict(color='black', dash='solid')
    ),
    secondary_y=True
)

# Update layout
fig.update_layout(
    title="Gradient Boosting Regression: Predicted vs Actual (Evaluation Data)",
    xaxis_title="Actual Values",
    yaxis_title="Predicted Values",
    yaxis2_title="MAE",
    showlegend=True
)

# Show the plot
fig.show()


In [27]:

%pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [28]:

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# Define the neural network architecture
def create_model(input_shape):
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=input_shape),
        layers.Dropout(0.2),
        layers.Dense(32, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(16, activation='relu'),
        layers.Dense(1)
    ])
    
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    return model

# Create and train the model
nn_model = create_model((X_train_scaled.shape[1],))

# Train the model with early stopping to prevent overfitting
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

history = nn_model.fit(
    X_train_scaled,
    y_train_scaled,
    epochs=100,
    batch_size=32,
    validation_data=(X_eval_scaled, y_eval_scaled),
    callbacks=[early_stopping],
    verbose=1
)

# Make predictions on evaluation set
y_pred_eval_scaled_nn = nn_model.predict(X_eval_scaled)
y_pred_eval_original_nn = scaler_y.inverse_transform(y_pred_eval_scaled_nn)

# Calculate evaluation metrics
mae_eval_nn = mae(y_eval, y_pred_eval_original_nn)
mse_eval_nn = mse(y_eval, y_pred_eval_original_nn)
rmse_eval_nn = np.sqrt(mse_eval_nn)
r2_eval_nn = r2_score(y_eval, y_pred_eval_original_nn)

# Print the evaluation metrics
print(f"Neural Network Evaluation MAE: {mae_eval_nn}")
print(f"Neural Network Evaluation MSE: {mse_eval_nn}")
print(f"Neural Network Evaluation RMSE: {rmse_eval_nn}")
print(f"Neural Network Evaluation R²: {r2_eval_nn}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4ms/step - loss: 0.8121 - mae: 0.6167 - val_loss: 0.3562 - val_mae: 0.4408
Epoch 2/100
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.4240 - mae: 0.4497 - val_loss: 0.3229 - val_mae: 0.4186
Epoch 3/100
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.3286 - mae: 0.4147 - val_loss: 0.3048 - val_mae: 0.4067
Epoch 4/100
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.3135 - mae: 0.4016 - val_loss: 0.3149 - val_mae: 0.3994
Epoch 5/100
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.3108 - mae: 0.3912 - val_loss: 0.2981 - val_mae: 0.4009
Epoch 6/100
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.2944 - mae: 0.3854 - val_loss: 0.2810 - val_mae: 0.3871
Epoch 7/100
[1m413/413[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/

In [46]:
from plotly.subplots import make_subplots

import plotly.graph_objects as go

# Create a figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add scatter plot for predictions
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_pred_eval_original_nn.flatten(),
        mode='markers',
        name='Predicted vs Actual',
        marker=dict(color='gray', size=8, opacity=0.7)
    ),
    secondary_y=False
)

# Add perfect prediction line
fig.add_trace(
    go.Scatter(
        x=y_eval.values,
        y=y_eval.values,
        mode='lines',
        name='Perfect Prediction Line',
        line=dict(color='red', dash='dash')
    ),
    secondary_y=False
)

# Calculate RMSE from MSE


# Add MSE loss line
fig.add_trace(
    go.Scatter(
        x=[y_eval.values.min(), y_eval.values.max()],
        y=[mae_eval_nn, mae_eval_nn],
        mode='lines',
        name=f'MAE: {mae_eval_nn:.2f}',
        line=dict(color='black', dash='solid')
    ),
    secondary_y=True
)

# Update layout
fig.update_layout(
    title="Sequential Neural Network : Predicted vs Actual (Evaluation Data)",
    xaxis_title="Actual Values",
    yaxis_title="Predicted Values",
    yaxis2_title="MAE",
    showlegend=True
)

# Show the plot
fig.show()
