In [1]:
#Multiple class

In [41]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Conv1D, GlobalMaxPooling1D, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


maximum_features = 5000  # Maximum number of words to consider as features
maximum_length = 100  # Maximum length of input sequences
word_embedding_dims = 50  # Dimension of word embeddings
no_of_filters = 250  # Number of filters in the convolutional layer
kernel_size = 3
hidden_dims = 250  # Number of neurons in the hidden layer
batch_size = 32
epochs = 5
num_classes = 5


df = pd.read_csv('/content/products_clean.csv')
texts = df['description'].values
labels = df['product_type'].values



In [49]:
label_mapping = {'contour': 0, 'eye_makeup': 1, 'foundation': 2, 'lipstic': 3, 'nail_polish': 4}
y = np.array([label_mapping[label] for label in labels])
X_train, X_test, y_train, y_test = train_test_split(texts, y, test_size=0.2, random_state=42)


In [50]:
X_train

array(['twist shout renowned extreme lasting power creamy liner delivers intense pigmentation precise application easy twist tube never need sharpening',
       'let eye naturally pop b smudged subtle eye color add tint color base lash organic cream eye color b smudged eliminates inevitable uneven line traditional eyeliner require expert blending technique messy powder based shadow simply smudge along lash line color stay place long lasting look',
       'achieve flawless complexion silky smooth e l f studio flawless finish foundation lightweight oil free formula blend naturally beautiful semi matte finish liquid foundation applies easily last day visibly brighter radiant looking skin using e l f flawless finish foundation restores uneven skin texture tone importantly spf protects skin harmful sun damage shade range active ingredient octyl salicylate titanium dioxide inactive ingredient water aqua cyclopentasiloxane isododecane hydrogenated polyisobutene cetyl peg ppg dimethicone polyg

In [52]:
y_train

array([1, 1, 2, 0, 0, 1, 2, 0, 3, 2, 1, 1, 1, 1, 2, 1, 2, 2, 1, 1, 3, 1,
       1, 4, 2, 3, 1, 1, 3, 3, 0, 4, 3, 1, 2, 1, 0, 1, 1, 3, 0, 1, 1, 3,
       0, 2, 2, 2, 2, 3, 4, 1, 1, 1, 4, 0, 2, 3, 3, 0, 0, 0, 3, 0, 3, 2,
       2, 3, 0, 0, 0, 1, 0, 1, 1, 3, 1, 2, 1, 3, 2, 1, 0, 3, 3, 2, 0, 3,
       3, 0, 1, 0, 2, 3, 3, 1, 3, 0, 1, 1, 3, 1, 3, 3, 1, 3, 1, 2, 4, 0,
       2, 3, 1, 0, 2, 0, 2, 3, 2, 2, 3, 2, 2, 1, 1, 2, 0, 0, 3, 1, 1, 3,
       1, 2, 2, 3, 2, 1, 3, 2, 1, 1, 1, 2, 0, 1, 0, 1, 0, 1, 4, 3, 2, 3,
       0, 2, 1, 0, 3, 1, 0, 0, 1, 1, 4, 1, 1, 1, 1, 3, 1, 2, 1, 4, 1, 3,
       1, 0, 3, 3, 1, 1, 3, 3, 0, 4, 4, 1, 0, 3, 0, 2, 0, 4, 1, 0, 1, 3,
       1, 3, 3, 1, 1, 1, 3, 0, 2, 1, 3, 3, 4, 3, 1, 1, 3, 2, 4, 3, 3, 2,
       2, 3, 2, 3, 3, 0, 3, 2, 2, 2, 1, 3, 0, 1, 1, 1, 1, 3, 1, 3, 2, 3,
       2, 0, 0, 1, 1, 1, 4, 1, 3, 3, 1, 1, 0, 2, 1, 1, 0, 0, 1, 1, 3, 1,
       1, 0, 3, 1, 1, 2, 4, 1, 0, 1, 2, 4, 1, 2, 1, 1, 4, 0, 2, 1, 1, 1,
       3, 1, 0, 4, 1, 3, 2, 4, 1, 0, 0, 1, 1, 0, 1,

In [53]:
tokenizer = Tokenizer(num_words=maximum_features)
tokenizer.fit_on_texts(X_train)


X_train = tokenizer.texts_to_sequences(X_train)
X_test = tokenizer.texts_to_sequences(X_test)


In [54]:
X_train

[[394,
  2937,
  1556,
  585,
  102,
  730,
  99,
  46,
  198,
  224,
  1082,
  205,
  82,
  83,
  394,
  625,
  193,
  292,
  586],
 [395,
  8,
  316,
  284,
  687,
  1557,
  218,
  8,
  6,
  304,
  558,
  6,
  151,
  15,
  122,
  110,
  8,
  6,
  687,
  1557,
  2938,
  2939,
  1813,
  29,
  910,
  84,
  2940,
  731,
  528,
  1083,
  975,
  23,
  277,
  59,
  732,
  219,
  410,
  15,
  29,
  6,
  141,
  911,
  40,
  102,
  14],
 [270,
  189,
  226,
  135,
  50,
  160,
  73,
  688,
  123,
  189,
  34,
  76,
  163,
  5,
  16,
  11,
  62,
  316,
  164,
  976,
  26,
  34,
  103,
  76,
  1224,
  172,
  161,
  78,
  689,
  977,
  207,
  120,
  7,
  165,
  160,
  73,
  688,
  189,
  34,
  76,
  2129,
  1813,
  7,
  208,
  79,
  2130,
  350,
  529,
  7,
  2131,
  210,
  587,
  4,
  157,
  659,
  2,
  1366,
  844,
  20,
  24,
  1814,
  2,
  41,
  211,
  411,
  501,
  117,
  467,
  271,
  236,
  690,
  42,
  305,
  502,
  978,
  530,
  293,
  2941,
  330,
  35,
  63,
  42,
  559,
  421,
  266,


In [55]:
X_test

[[1406,
  509,
  12,
  75,
  122,
  2,
  633,
  456,
  1236,
  757,
  1911,
  177,
  48,
  2720,
  15,
  430,
  11,
  22,
  361,
  11,
  2762,
  12,
  868,
  243,
  767,
  4087,
  2114,
  1236,
  3978,
  15,
  485,
  896,
  44,
  16,
  203,
  16,
  3870,
  545,
  16,
  1284,
  613,
  16,
  142,
  806,
  16,
  100,
  16,
  797,
  16,
  142,
  6],
 [37,
  282,
  27,
  1107,
  931,
  9,
  80,
  825,
  872,
  46,
  798,
  1812,
  1515,
  147,
  27,
  394,
  450,
  599,
  46,
  1229,
  54,
  158,
  491,
  602,
  56,
  740,
  399,
  9,
  14,
  53,
  1078,
  9,
  29,
  984,
  4,
  367,
  448,
  1383,
  54,
  51,
  2921,
  450,
  292,
  394,
  931,
  41,
  200,
  171,
  464,
  46,
  1219,
  9,
  6,
  1515,
  798,
  394,
  450,
  1812,
  46,
  93,
  48,
  100,
  32,
  144,
  105,
  128,
  44,
  63,
  2,
  501,
  142,
  36,
  532,
  626,
  627,
  368,
  707,
  427,
  1693,
  252,
  114,
  255,
  2925,
  42,
  368,
  638,
  639,
  435,
  640,
  30,
  20,
  24,
  1,
  10,
  3,
  1,
  10,
  3,
  1,

In [56]:
X_train = pad_sequences(X_train, maxlen=maximum_length)
X_test = pad_sequences(X_test, maxlen=maximum_length)

In [57]:
model = Sequential()
model.add(Embedding(input_dim=maximum_features, output_dim=word_embedding_dims, input_length=maximum_length))
model.add(Conv1D(no_of_filters, kernel_size, padding='valid', activation='relu', strides=1))
model.add(GlobalMaxPooling1D())
model.add(Dense(hidden_dims, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))



In [58]:
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Training the model
model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test))


Epoch 1/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 85ms/step - accuracy: 0.3630 - loss: 1.5349 - val_accuracy: 0.4725 - val_loss: 1.3472
Epoch 2/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 5ms/step - accuracy: 0.4113 - loss: 1.3657 - val_accuracy: 0.5824 - val_loss: 0.9799
Epoch 3/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5551 - loss: 1.0044 - val_accuracy: 0.7198 - val_loss: 0.7091
Epoch 4/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7651 - loss: 0.6529 - val_accuracy: 0.8681 - val_loss: 0.4519
Epoch 5/5
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8982 - loss: 0.4057 - val_accuracy: 0.9231 - val_loss: 0.2591


<keras.src.callbacks.history.History at 0x7e854c1caf50>

In [59]:
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)


accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')


print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)



[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 53ms/step
Accuracy: 0.9230769230769231
Precision: 0.9319887429643527
Recall: 0.9230769230769231
F1-score: 0.9245641576468645


In [None]:
# test the model using the new unseen sentence
def predict_new_sentence(sentence):
    sentence_seq = tokenizer.texts_to_sequences([sentence])

    sentence_pad = pad_sequences(sentence_seq, maxlen=maximum_length)

    prediction_prob = model.predict(sentence_pad)
    print(prediction_prob)
    predicted_class = np.argmax(prediction_prob, axis=1)[0]
    label_mapping_reverse = {v: k for k, v in label_mapping.items()}
    predicted_label = label_mapping_reverse[predicted_class]
    
    return predicted_label


new_sentence = "This is a beautiful red lipstick with long-lasting color."
predicted_label = predict_new_sentence(new_sentence)
print(f"The predicted product type for the sentence is: {predicted_label}")


In [None]:
# 1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 262ms/step
# [[0.09253944 0.01993478 0.05150839 0.73934764 0.09666977]]
# The predicted product type for the sentence is: lipstic
# label_mapping = {'contour': 0, 'eye_makeup': 1, 'foundation': 2, 'lipstic': 3, 'nail_polish': 4}