In [1]:
# Standard imports
import pandas as pd
import sqlite3 as sql3
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import numpy as np
from sklearn.metrics import r2_score
import keras_tuner as kt

pd.set_option("display.max_colwidth", None) 

In [2]:
# Importing data from sqlite file
database_path = './FlaskSQLBackend/laptop_data_database.sqlite'

laptop_df = ''

with sql3.connect(database_path) as conn:
    laptop_df = pd.read_sql("SELECT * FROM Laptops", conn)

    laptop_df = pd.get_dummies(laptop_df)

In [3]:
print(len(laptop_df.columns))

X = laptop_df.drop('PRICE', axis=1)  # Assuming 'PRICE' is the target variable
y = laptop_df['PRICE']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Adjust test size as needed

# Scale numerical features using StandardScaler
numerical_features = ['INCHES', 'MEMORY', 'WEIGHT', 'SCREENWIDTH', 'SCREENHEIGHT', 'CPUSPEED', 'PRIMARYSTORAGEAMOUNT', 'SECONDARYSTORAGEAMOUNT']
X[numerical_features].apply(pd.to_numeric)
X = X.dropna(how='any',axis=0) 

X_scaler = StandardScaler()
X_train[numerical_features] = X_scaler.fit_transform(X_train[numerical_features])
X_test[numerical_features] = X_scaler.transform(X_test[numerical_features]) # Use the same scaler fitted on training data

y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(pd.DataFrame(y_train))
y_test = y_scaler.transform(pd.DataFrame(y_test)) # Use the same scaler fitted on training data

X_train = X_train.to_numpy().astype(np.float32)
X_test = X_test.to_numpy().astype(np.float32)

63


In [None]:
def create_model(hp):
	# Creating the model
	nn_model = tf.keras.models.Sequential()

	# Allowing the kerastuner to decide which activation function to use in the hidden layers
	activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

	nn_model.add(tf.keras.layers.Dense(
		units = hp.Int(
			'first_units',
			min_value=30,
			max_value=90,
			step=2
		),
		activation = activation,
		input_dim=62
	))

	for i in range(hp.Int('num_layers', 1, 5)):
		nn_model.add(tf.keras.layers.Dense(
			units = hp.Int(
				'units_' + str(i),
				min_value=10,
				max_value=80,
				step=2
			),
			activation = activation
		))

	nn_model.add(tf.keras.layers.Dense(1))

	nn_model.compile(loss='mse', optimizer='adam', metrics=['mae'])

	return nn_model

tuner = kt.Hyperband(
	create_model,
	objective='val_loss', # Validation accuracy
	max_epochs=80,
	hyperband_iterations=2
)

# Tuning for the best hyperparameters
tuner.search(X_train, y_train, epochs=80, validation_data=(X_test, y_test))

best_hyper = tuner.get_best_hyperparameters(1)[0]

nn_model = tuner.get_best_models(1)[0]

#nn_model = tf.keras.Sequential([
#    tf.keras.layers.Dense(64, activation='relu', input_shape=(X_train.shape[1],)), # Input layer with correct shape
#    tf.keras.layers.Dense(32, activation='relu'),
#    tf.keras.layers.Dense(16, activation='relu'),
#    tf.keras.layers.Dense(1) # Output layer with a single neuron for regression (price)
#])
# fit_model = nn_model.fit(X_train, y_train, epochs=65, batch_size=32)

nn_model.summary()

nn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

Trial 29 Complete [00h 00m 05s]
val_loss: 0.2731573283672333

Best val_loss So Far: 0.14346586167812347
Total elapsed time: 00h 01m 12s

Search: Running Trial #30

Value             |Best Value So Far |Hyperparameter
relu              |relu              |activation
56                |50                |first_units
1                 |2                 |num_layers
46                |32                |units_0
62                |46                |units_1
52                |76                |units_2
12                |46                |units_3
76                |50                |units_4
20                |20                |tuner/epochs
0                 |7                 |tuner/initial_epoch
0                 |2                 |tuner/bracket
0                 |2                 |tuner/round

Epoch 1/20
Epoch 2/20
 4/33 [==>...........................] - ETA: 0s - loss: 0.3409 - mae: 0.3957

In [None]:
loss, mae = nn_model.evaluate(X_test, y_test, verbose=0)

predicted_y = [x[0] for x in y_scaler.inverse_transform(nn_model.predict(X_test))]
y_test = [x[0] for x in y_scaler.inverse_transform(y_test)]
r_square = r2_score(y_test, predicted_y)

print(f"Loss: {loss}")
print(f"Mean Absolute Error: {mae}")
print(f"R Squared: {r_square}")


dataframe_values = pd.DataFrame({
    "Predicted" : predicted_y,
    "Actual" : y_test
})

print(dataframe_values.head(50))

Loss: 0.12896502017974854
Mean Absolute Error: 0.2421550750732422
R Squared: 0.8758468132250212
        Predicted       Actual
0    29955.500000   35964.0000
1    68258.539062   71847.0144
2   128591.601562  111834.7200
3    98449.679688  101178.7200
4    29191.908203   24455.5200
5    69504.656250   74538.1872
6   100266.007812   95850.7200
7    89072.046875   79866.7200
8    77663.296875   86793.1200
9    17280.847656   18594.7200
10   16292.695312   12201.1200
11   92390.257812  107257.9680
12   52670.226562   67772.1600
13   51656.175781   66546.7200
14   96672.070312   94731.8400
15   31357.068359   36089.2080
16   31219.378906   24935.0400
17   29280.535156   32713.9200
18   86681.656250   95850.7200
19   34772.695312   63882.7200
20   38550.878906   51095.5200
21   65469.460938   55922.6880
22   31583.355469   37570.3920
23   44060.027344   35111.5200
24   44340.601562   53733.9456
25   52801.035156   53226.7200
26  120324.117188   93932.6400
27   46552.417969   61485.1200
28   