In [1]:
# Standard imports
import pandas as pd
import sqlite3 as sql3
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import numpy as np
from sklearn.metrics import r2_score
import keras_tuner as kt

pd.set_option("display.max_colwidth", None) 

In [2]:
# Importing data from sqlite file
database_path = './FlaskSQLBackend/laptop_data_database.sqlite'

laptop_df = ''

with sql3.connect(database_path) as conn:
    laptop_df = pd.read_sql("SELECT * FROM Laptops", conn)

    laptop_df = pd.get_dummies(laptop_df)

In [3]:
print(len(laptop_df.columns))

X = laptop_df.drop('PRICE', axis=1)  # Assuming 'PRICE' is the target variable
y = laptop_df['PRICE']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Adjust test size as needed

# Scale numerical features using StandardScaler
numerical_features = ['INCHES', 'MEMORY', 'WEIGHT', 'SCREENWIDTH', 'SCREENHEIGHT', 'CPUSPEED', 'PRIMARYSTORAGEAMOUNT', 'SECONDARYSTORAGEAMOUNT']
X[numerical_features].apply(pd.to_numeric)
X = X.dropna(how='any',axis=0) 

X_scaler = StandardScaler()
X_train[numerical_features] = X_scaler.fit_transform(X_train[numerical_features])
X_test[numerical_features] = X_scaler.transform(X_test[numerical_features]) # Use the same scaler fitted on training data

y_scaler = StandardScaler()
y_train = y_scaler.fit_transform(pd.DataFrame(y_train))
y_test = y_scaler.transform(pd.DataFrame(y_test)) # Use the same scaler fitted on training data

X_train = X_train.to_numpy().astype(np.float32)
X_test = X_test.to_numpy().astype(np.float32)

63


In [None]:
def create_model(hp):
	# Creating the model
	nn_model = tf.keras.models.Sequential()

	# Allowing the kerastuner to decide which activation function to use in the hidden layers
	activation = hp.Choice('activation', ['relu', 'tanh', 'sigmoid'])

	nn_model.add(tf.keras.layers.Dense(
		units = hp.Int(
			'first_units',
			min_value=30,
			max_value=90,
			step=2
		),
		activation = activation,
		input_dim=62
	))

	for i in range(hp.Int('num_layers', 1, 5)):
		nn_model.add(tf.keras.layers.Dense(
			units = hp.Int(
				'units_' + str(i),
				min_value=10,
				max_value=80,
				step=2
			),
			activation = activation
		))

	nn_model.add(tf.keras.layers.Dense(1))

	nn_model.compile(loss='mse', optimizer='adam', metrics=['mae'])

	return nn_model

tuner = kt.Hyperband(
	create_model,
	objective='val_loss', # Validation accuracy
	max_epochs=80,
	hyperband_iterations=2
)

# Tuning for the best hyperparameters
tuner.search(X_train, y_train, epochs=80, validation_data=(X_test, y_test))

best_hyper = tuner.get_best_hyperparameters(1)[0]

nn_model = tuner.get_best_models(1)[0]

nn_model.summary()

nn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

Reloading Tuner from .\untitled_project\tuner0.json
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 62)                3906      
                                                                 
 dense_1 (Dense)             (None, 74)                4662      
                                                                 
 dense_2 (Dense)             (None, 50)                3750      
                                                                 
 dense_3 (Dense)             (None, 1)                 51        
                                                                 
Total params: 12,369
Trainable params: 12,369
Non-trainable params: 0
_________________________________________________________________


In [5]:
loss, mae = nn_model.evaluate(X_test, y_test, verbose=0)

predicted_y = [x[0] for x in y_scaler.inverse_transform(nn_model.predict(X_test))]
y_test = [x[0] for x in y_scaler.inverse_transform(y_test)]
r_square = r2_score(y_test, predicted_y)

print(f"Loss: {loss}")
print(f"Mean Absolute Error: {mae}")
print(f"R Squared: {r_square}")


dataframe_values = pd.DataFrame({
    "Predicted" : predicted_y,
    "Actual" : y_test
})

print(dataframe_values.head(50))

Loss: 0.04130559787154198
Mean Absolute Error: 0.11579518020153046
R Squared: 0.9583918837350499
        Predicted       Actual
0    93363.000000   93985.9200
1    58811.542969   58021.9200
2    23158.441406   17529.1200
3   105909.710938  101178.7200
4    38182.933594   38041.3872
5    81886.632812   84129.1200
6    35854.910156   53226.7200
7    62690.128906   63669.6000
8    63510.132812   63882.7200
9    55029.093750   53759.5200
10   14852.277344   15557.7600
11   47472.074219   53226.7200
12   68236.515625   69210.7200
13   30542.863281   35111.5200
14   18882.730469   18328.3200
15   80083.671875   93240.0000
16   43511.628906   42357.6000
17   61706.699219   76030.5600
18   32990.492188   34578.7200
19   56677.648438   58021.9200
20   28422.984375   30316.3200
21   18455.214844   18328.3200
22   46629.316406   47898.7200
23   19700.066406   17582.4000
24   21338.730469   20459.5200
25   50091.695312   50562.7200
26  139185.781250  146519.4672
27   42552.890625   43156.8000
28  