# Neural Network Model
- Contributed by Ching-Hao Wang, and slightly edited by Hongjie Wang.

# Data Preprocessing

In [24]:
import sys
import os
sys.path.append(os.path.abspath("../src"))

import pandas as pd
from sklearn.preprocessing import OneHotEncoder, MinMaxScaler
from preprocessing import preprocess_data


In [25]:
dataset_path = "../data/used_device_data.csv"
df = preprocess_data(dataset_path)
df.to_csv("../data/processed_data.csv", index=False)
print("Preprocessing Completed. Data saved as processed_data.csv in data folder.")

Preprocessing Completed. Data saved as processed_data.csv in data folder.


In [26]:
processed_df = pd.read_csv('../data/processed_data.csv')

In [27]:
processed_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3454 entries, 0 to 3453
Data columns (total 49 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   screen_size              3454 non-null   float64
 1   4g                       3454 non-null   int64  
 2   5g                       3454 non-null   int64  
 3   rear_camera_mp           3454 non-null   float64
 4   front_camera_mp          3454 non-null   float64
 5   internal_memory          3454 non-null   float64
 6   ram                      3454 non-null   float64
 7   battery                  3454 non-null   float64
 8   weight                   3454 non-null   float64
 9   release_year             3454 non-null   float64
 10  days_used                3454 non-null   float64
 11  normalized_used_price    3454 non-null   float64
 12  normalized_new_price     3454 non-null   float64
 13  device_brand_Alcatel     3454 non-null   int64  
 14  device_brand_Apple      

In [28]:
processed_df.describe()

Unnamed: 0,screen_size,4g,5g,rear_camera_mp,front_camera_mp,internal_memory,ram,battery,weight,release_year,...,device_brand_Samsung,device_brand_Sony,device_brand_Spice,device_brand_Vivo,device_brand_XOLO,device_brand_Xiaomi,device_brand_ZTE,os_Others,os_Windows,os_iOS
count,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,...,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0,3454.0
mean,0.336836,0.676028,0.044007,0.194168,0.204792,0.053259,0.335232,0.285593,0.144664,0.423608,...,0.098726,0.024899,0.008686,0.033874,0.014186,0.038217,0.040533,0.039664,0.019398,0.010423
std,0.14847,0.468057,0.20514,0.098083,0.217764,0.082937,0.113883,0.140842,0.112378,0.328351,...,0.298337,0.155839,0.092804,0.18093,0.118276,0.191746,0.197234,0.195197,0.137939,0.101573
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.297308,0.0,0.0,0.102671,0.0625,0.015615,0.33222,0.173536,0.092875,0.142857,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,0.30238,1.0,0.0,0.165275,0.15625,0.031241,0.33222,0.27115,0.115776,0.357143,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,0.400312,1.0,0.0,0.269616,0.25,0.062491,0.33222,0.37961,0.147583,0.714286,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


# Model training and definition

In [29]:
from sklearn.model_selection import train_test_split
from model import NN_model

# Splitting data
X = processed_df.drop(columns=["normalized_used_price"]).values
y = processed_df["normalized_used_price"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train model
input_shape = X_train.shape[1]
nn_model = NN_model(input_shape=input_shape)
nn_model.train(X_train, y_train, epochs=50, batch_size=32)

# Evaluate model
results = nn_model.evaluate(X_test, y_test)
print(results)

Epoch 1/50


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - loss: 10.8871 - mae: 2.9668 - val_loss: 0.2067 - val_mae: 0.3539
Epoch 2/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3725 - mae: 0.4842 - val_loss: 0.1101 - val_mae: 0.2485
Epoch 3/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2949 - mae: 0.4299 - val_loss: 0.0954 - val_mae: 0.2321
Epoch 4/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2739 - mae: 0.4145 - val_loss: 0.0901 - val_mae: 0.2263
Epoch 5/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2301 - mae: 0.3800 - val_loss: 0.0862 - val_mae: 0.2186
Epoch 6/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2199 - mae: 0.3683 - val_loss: 0.0806 - val_mae: 0.2122
Epoch 7/50
[1m78/78[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1989 - mae: 0.35

In [30]:
for i in range(20):
  # Predict one data point from test set
  sample_index = i  # Change index to observe different predictions
  sample_data = X_test[sample_index].reshape(1, -1)
  predicted_value = nn_model.predict(sample_data)
  actual_value = y_test[sample_index]
  print(f"Predicted Value: {predicted_value[0]}, Actual Value: {actual_value}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Predicted Value: 3.8686485290527344, Actual Value: 3.974246349
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
Predicted Value: 4.868424892425537, Actual Value: 5.001190835
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Predicted Value: 4.307069301605225, Actual Value: 4.562053895
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Predicted Value: 4.130362033843994, Actual Value: 4.176231722
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
Predicted Value: 3.7966015338897705, Actual Value: 3.156148995
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Predicted Value: 4.238928318023682, Actual Value: 3.777119243
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Predicted Value: 4.708792209625244, Actual Value: 