<a href="https://colab.research.google.com/github/AndresS0103/neural-network-data-warehouse-project/blob/main/ML-Warehouse.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import required libraries
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
import pandas as pd

# Load the dataset
file_path = 'https://docs.google.com/spreadsheets/d/1GMnSlL_KB0fCAYSY23CbD7YRu_o1y7nfnx7d27XWuOY/export?format=csv'
laptop_data = pd.read_csv(file_path)

# Step 1: Define price categories
def categorize_price(price):
    if price < 700:
        return 'Barata'
    elif 700 <= price < 1500:
        return 'Moderada'
    else:
        return 'Cara'

# Apply categorization to the dataset
laptop_data['Price_Category'] = laptop_data['Price_euros'].apply(categorize_price)

# Step 2: Generate textual representation for each laptop
laptop_data['Description'] = (
    laptop_data['Company'] + " " + laptop_data['Product'] + " " +
    laptop_data['TypeName'] + ", " + laptop_data['Ram'].astype(str) + "GB RAM, " +
    laptop_data['PrimaryStorageType'] + ", " + laptop_data['CPU_model'] + ", " +
    laptop_data['GPU_model']
)

# Step 3: Preprocess the data for the neural network
# Split data into training and test sets
X = laptop_data['Description']
y = laptop_data['Price_Category']

# Map price categories to numerical values
category_to_id = {'Barata': 0, 'Moderada': 1, 'Cara': 2}
y = y.map(category_to_id)

# Convert labels to one-hot encoding
y = to_categorical(y, num_classes=3)

# Tokenize the text descriptions
tokenizer = Tokenizer()
tokenizer.fit_on_texts(X)
X_seq = tokenizer.texts_to_sequences(X)

# Pad sequences to the same length
maxlen = 50
X_padded = pad_sequences(X_seq, padding='post', maxlen=maxlen)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_padded, y, test_size=0.2, random_state=42)

# Define the updated neural network model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=32, input_length=maxlen),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(3, activation='softmax')  # 3 categories: Barata, Moderada, Cara
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=15, batch_size=32, validation_split=0.2)

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

# Prediction function
def predict_laptop(description):
    seq = tokenizer.texts_to_sequences([description])
    padded = pad_sequences(seq, padding='post', maxlen=maxlen)
    pred_prob = model.predict(padded)
    category_id = pred_prob.argmax(axis=-1)[0]
    id_to_category = {0: 'Barata', 1: 'Moderada', 2: 'Cara'}
    return id_to_category[category_id], pred_prob[0]

# Example predictions
example_description = "Dell Inspiron Notebook, 16GB RAM, SSD, Core i7, NVIDIA GTX 1050"
category, probabilities = predict_laptop(example_description)
print(f"Predicted Category: {category}")
print(f"Probabilities: {probabilities}")


Epoch 1/15




[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 10ms/step - accuracy: 0.4000 - loss: 1.0876 - val_accuracy: 0.4363 - val_loss: 1.0617
Epoch 2/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4191 - loss: 1.0534 - val_accuracy: 0.4363 - val_loss: 1.0444
Epoch 3/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.4644 - loss: 1.0108 - val_accuracy: 0.5049 - val_loss: 1.0146
Epoch 4/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5421 - loss: 1.0044 - val_accuracy: 0.6176 - val_loss: 0.9617
Epoch 5/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6754 - loss: 0.9212 - val_accuracy: 0.6176 - val_loss: 0.8922
Epoch 6/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6824 - loss: 0.8342 - val_accuracy: 0.6176 - val_loss: 0.8158
Epoch 7/15
[1m26/26[0m [32m━━━━━━━━━━━━━━━━━━━━