# **About Datasets**

ChatGPT has been a major talk in the tech world. The tweets about chatgpt were gathered for a month and then the sentiment analysis was made using Natural Language Processing.

**Features**
*   Tweets	: text of tweet data
*   abels	: class of sentiment (good, bad, neutral)


# **Import Library & Datasets**

---

In [None]:
pip install nltk

In [None]:
pip install lightgbm xgboost scipy

In [None]:
pip install imbalanced-learn scikit-learn

In [None]:
pip install optuna

In [None]:
pip install keras-tuner

In [None]:
pip install tensorflow

In [None]:
pip install tabulate

In [None]:
pip install wordcloud

In [None]:
pip install tensorflow keras

In [2]:
# Commented out IPython magic to ensure Python compatibility.
import pandas as pd
import numpy as np
import seaborn as sns
import re
import tensorflow as tf
import matplotlib.pyplot as plt
import string
import keras.backend as K

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from tensorflow.keras.utils import to_categorical
from keras.layers import Dense,Conv1D,MaxPooling1D
from keras import models
from keras import layers
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords

In [3]:
df=pd.read_csv('data_pangkas.csv')

## **Bagian lain preprocessing**

In [8]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [4]:

tokenized_tweet = df['tweets'].apply(lambda x: word_tokenize(x))
tokenized_tweet.head()

0    [openais, chatgpt, is, the, worlds, best, chat...
1    [i, tried, to, ask, chatgpt, the, last, questi...
2    [you, got, to, love, chatgpt, decoding, urls, ...
3    [chatgpt, is, phenomenal, but, im, not, jumpin...
4    [chatgpt, proves, ai, is, finally, mainstream,...
Name: tweets, dtype: object

In [6]:
"""### Lemmatization"""

lemmatizer = WordNetLemmatizer()
tokenized_tweet = tokenized_tweet.apply(lambda sentence: [lemmatizer.lemmatize(word) for word in sentence])
tokenized_tweet.head()

for i in range(len(tokenized_tweet)):
    tokenized_tweet[i] = " ".join(tokenized_tweet[i])

df['clean_data'] = tokenized_tweet
df.head()

Unnamed: 0,tweets,labels,clean_data
0,openais chatgpt is the worlds best chatbot by,good,openais chatgpt is the world best chatbot by
1,i tried to ask chatgpt the last question glad...,good,i tried to ask chatgpt the last question glad ...
2,you got to love chatgpt decoding urls for mast...,good,you got to love chatgpt decoding url for masto...
3,chatgpt is phenomenal but im not jumping on th...,bad,chatgpt is phenomenal but im not jumping on th...
4,chatgpt proves ai is finally mainstream and th...,bad,chatgpt prof ai is finally mainstream and thin...


In [None]:
# Inisialisasi lemmatizer
lemmatizer = WordNetLemmatizer()

# Tokenisasi kalimat dan lemmatize setiap kata
def lemmatize_sentence(sentence):
    # Tokenisasi kalimat menjadi kata
    tokens = word_tokenize(sentence)
    # Lemmatize setiap kata
    return [lemmatizer.lemmatize(word) for word in tokens]

# Terapkan tokenisasi dan lemmatization
df['clean_data'] = df['tweets'].apply(lemmatize_sentence)

# Gabungkan kata-kata kembali menjadi kalimat
df['clean_data'] = df['clean_data'].apply(lambda x: " ".join(x))

# Tampilkan hasil
df.head()


# **Bagian Yang Lain**

# **Splitting Data**

---

In [10]:
x_train, x_test, y_train, y_test = train_test_split(df.clean_data, df.labels, test_size=0.2, random_state=42)
print(f'# training data : {x_train.shape[0]}')
print(f'# testing data : {x_test.shape[0]}')

# training data : 96000
# testing data : 24000


## **Gnerating Training and Testing Data**

In [11]:
training_data=pd.DataFrame({'clean_data':x_train, 'labels':y_train[:]})
testing_data=pd.DataFrame({'clean_data':x_test, 'labels':y_test[:]})
training_data.to_csv('training_data.csv')
testing_data.to_csv('testing_data.csv')

# **Teks Procesing**

In [12]:
print(type(x_train))
print(type(x_train.iloc[0]))  # Mengecek tipe data dari elemen pertama

<class 'pandas.core.series.Series'>
<class 'str'>


In [13]:
num_of_words = 32000
max_num_of_words = 50 # in sequence
num_of_dimensions = 300  # For GloVe word embeddings

In [14]:
tokenizer = Tokenizer(num_words=num_of_words, filters=string.punctuation)
tokenizer.fit_on_texts(x_train)

# Menyimpan tokenizer ke file
import pickle
with open('tokenizer.pkl', 'wb') as handle:
    pickle.dump(tokenizer, handle)

x_train_sequences = tokenizer.texts_to_sequences(x_train)
x_test_sequences = tokenizer.texts_to_sequences(x_test)

data_description = x_train.apply(lambda x: len(x.split(' ')))
data_description_df = pd.DataFrame(data_description.describe())
data_description_df

Unnamed: 0,clean_data
count,96000.0
mean,18.62199
std,12.959685
min,1.0
25%,8.0
50%,15.0
75%,27.0
max,61.0


In [15]:
max_num_of_words = 49

x_train = pad_sequences(x_train_sequences, maxlen=max_num_of_words)
x_test = pad_sequences(x_test_sequences, maxlen=max_num_of_words)

print(f'Sample : {x_train.shape}')

Sample : (96000, 49)


## **Format Output**

In [16]:
label_encoder = LabelEncoder()
y_train = to_categorical(label_encoder.fit_transform(y_train))
y_test = to_categorical(label_encoder.transform(y_test))
y_test[0]

array([0., 1., 0.])

# **Glove Word Embeddings**

In [14]:
num_of_words = 32000
max_num_of_words = 50 # in sequence
num_of_dimensions = 300  # For GloVe word embeddings

In [17]:
embedding_dict = {}
glove = open(f'glove.6B.300d.txt', encoding="utf8")
for line in glove:
  values = line.split()
  word = values[0]
  vector = np.asarray(values[1:], dtype='float32')
  embedding_dict[word] = vector
glove.close()

In [18]:
embedding_matrix = np.zeros((num_of_words, 300))

for w, i in tokenizer.word_index.items():
  if i < num_of_words:
    vect = embedding_dict.get(w)
    if vect is not None :
      embedding_matrix[i] = vect
  else:
    break

print('Embedding Matrix Shape: ', embedding_matrix.shape)

Embedding Matrix Shape:  (32000, 300)


# **Model Training**

In [19]:
# Function untuk Ploting nilai akurasi dan nilai loss

def plotting_model_measurements(history, model_name, trial_num):
  title_name = f'{model_name} Model (Trial-{trial_num})'
  fig, ax = plt.subplots(1,2, figsize=(10,4))
  fig.subplots_adjust(hspace=0.8, wspace=0.8)
  fig.suptitle(title_name, fontsize=16)
  fig.trial_num=trial_num

  ax[0].plot(history.history['accuracy'])
  ax[0].plot(history.history['val_accuracy'])
  ax[0].set_title('Model Accuracy')
  ax[0].set_xlabel('epoch')
  ax[0].set_ylabel('accuracy')
  ax[0].legend(['train', 'validation'], loc='best')
  ax[1].plot(history.history['loss'])
  ax[1].plot(history.history['val_loss'])
  ax[1].set_title('Model Loss')
  ax[1].set_xlabel('epoch')
  ax[1].set_ylabel('loss')
  ax[1].legend(['train', 'validation'], loc='best')
  #to_file=('/content/drive/MyDrive/Python/{model_name} Model Accuracy and Loss (Trial No.:{trial_num}).png')
  #plt.savefig(f'Images/{model_name} Model Accuracy and Loss (Trial No.:{trial_num}).png')
  plt.draw()
  plt.tight_layout()

## **CNN**

In [20]:
@tf.keras.utils.register_keras_serializable()
def precision_m(y_true, y_pred):
    true_positives = tf.reduce_sum(tf.cast(tf.round(tf.clip_by_value(y_true * y_pred, 0, 1)), tf.float32))
    predicted_positives = tf.reduce_sum(tf.cast(tf.round(tf.clip_by_value(y_pred, 0, 1)), tf.float32))
    precision = true_positives / (predicted_positives + tf.keras.backend.epsilon())
    return precision

@tf.keras.utils.register_keras_serializable()
def recall_m(y_true, y_pred):
    true_positives = tf.reduce_sum(tf.cast(tf.round(tf.clip_by_value(y_true * y_pred, 0, 1)), tf.float32))
    possible_positives = tf.reduce_sum(tf.cast(tf.round(tf.clip_by_value(y_true, 0, 1)), tf.float32))
    recall = true_positives / (possible_positives + tf.keras.backend.epsilon())
    return recall

@tf.keras.utils.register_keras_serializable()
def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2 * ((precision * recall) / (precision + recall + tf.keras.backend.epsilon()))


In [21]:
custom_objects={'f1_m': f1_m, 'precision_m': precision_m, 'recall_m': recall_m}

In [22]:
#from tensorflow.keras.regularizers import l2
from tensorflow.keras.regularizers import l1_l2

def buat_cnn(Conv1D_1, Conv1D_2, Conv1D_3, _learning_rate, embedding_trainable, model_name):
    model = models.Sequential(name=model_name)
    model.add(layers.Embedding(num_of_words, num_of_dimensions, input_length=max_num_of_words, name='embedding_Layer', weights=[embedding_matrix], trainable=embedding_trainable))
    
    if Conv1D_1:
        model.add(Conv1D(128, 16, padding='same', activation='relu'))
        model.add(MaxPooling1D(3))

    if Conv1D_2:
        model.add(Conv1D(128, 16, padding='same', activation='relu'))
        model.add(MaxPooling1D(3))

    if Conv1D_3:
        model.add(Conv1D(128, 16, padding='same', activation='relu'))
        model.add(MaxPooling1D(3))

    model.add(layers.Dropout(0.6))
    model.add(layers.Flatten())

    # L2 Regularization di Dense Layer
    model.add(Dense(64, activation='relu', kernel_regularizer=l1_l2(l1=0.001, l2=0.004)))
    model.add(layers.Dropout(0.3))

    model.add(Dense(32, activation='relu', kernel_regularizer=l1_l2(l1=0.001, l2=0.004)))
    model.add(layers.Dropout(0.2))

    model.add(Dense(3, activation='softmax', name='Output_Layer'))  # Output Layer

    model.layers[0].set_weights([embedding_matrix])
    
    if embedding_trainable:
        model.layers[0].trainable = True
    else:
        model.layers[0].trainable = False

    model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=_learning_rate), metrics=['accuracy', f1_m, precision_m, recall_m])

    model.summary()
    return model


In [23]:
cnn_model_trials_dict = {
    'Measurement' : ['Trial-1', 'Trial-2' , 'Trial-3', 'Trial-4', 'Trial-5'], # Trial-1 => 0 , Trial-2 => 1 , ...
             'Training Accuaracy' : [0,0,0,0,0],
             'Testing Accuaracy' : [0,0,0,0,0],
             'Loss' : [0,0,0,0,0],
             'Embedding layer Trainable':['No','No','No','No','No']
}

### **Training Model CNN**

####**Trial 1**

In [24]:
# Trial-1 (embedding_trainable=True)
cnn_model_trial1 = buat_cnn(embedding_trainable=True, Conv1D_1=True, Conv1D_2=False, Conv1D_3=False, _learning_rate=0.0001, model_name='CNN_Trial-1')

# Latih Model
cnn_model_trial1_history = cnn_model_trial1.fit(x_train, y_train,epochs=10, verbose=1,batch_size=256,validation_split=0.2)

# Menyimpan riwayat pelatihan ke file .pkl
import pickle
with open('CNN_model_trial1_history.pkl', 'wb') as file:
    pickle.dump(cnn_model_trial1_history.history, file)

print("History saved successfully!")

'''Plotting Model Architecture For Trial-1'''
tf.keras.utils.plot_model(cnn_model_trial1, show_shapes=True)
#plt.savefig('Images/CNN Model (Trial 1).png')

cnn_model_trials_dict['Embedding layer Trainable'][0]='Yes'



Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m183s[0m 596ms/step - accuracy: 0.4077 - f1_m: 0.1379 - loss: 4.8390 - precision_m: 0.4707 - recall_m: 0.0820 - val_accuracy: 0.5070 - val_f1_m: 0.2640 - val_loss: 2.9394 - val_precision_m: 0.6498 - val_recall_m: 0.1661
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m167s[0m 557ms/step - accuracy: 0.5360 - f1_m: 0.3623 - loss: 2.5327 - precision_m: 0.6358 - recall_m: 0.2585 - val_accuracy: 0.6805 - val_f1_m: 0.6451 - val_loss: 1.5523 - val_precision_m: 0.7475 - val_recall_m: 0.5679
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m158s[0m 526ms/step - accuracy: 0.6751 - f1_m: 0.6284 - loss: 1.4596 - precision_m: 0.7349 - recall_m: 0.5496 - val_accuracy: 0.7456 - val_f1_m: 0.7275 - val_loss: 1.1020 - val_precision_m: 0.7859 - val_recall_m: 0.6774
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m160s[0m 534ms/step - accuracy: 0.7431 - f1_m: 0.7233 - loss

In [None]:
#Evaluasi Model
# Trial-1
training_score = cnn_model_trial1.evaluate(x_train, y_train)
testing_score = cnn_model_trial1.evaluate(x_test, y_test)

print(f'\nTrial 1')
print(f'Training Accuaracy (Trial 1): {round(training_score[1]*100,1)}%')
print(f'Testing Accuaracy (Trial 1): {round(testing_score[1]*100,1)}%')

In [35]:
#Menyimpan Hassil Evaluasi
cnn_model_trials_dict['Training Accuaracy'][0]=round(training_score[1]*100,1)
cnn_model_trials_dict['Testing Accuaracy'][0]=round(testing_score[1]*100,1)
cnn_model_trials_dict['Loss'][0] = round(testing_score[0], 2)

In [None]:
# Plotting hasil pelatihan
plotting_model_measurements(cnn_model_trial1_history,'CNN',1)

In [44]:
cnn_model_trial1.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Menyimpan model format SaveModel
cnn_model_trial1.save('CG/CNN_model_trial1.h5')
print("Model saved successfully!")

In [None]:
# Fungsi untuk memproses teks input
def preprocess_input_text(text):
    text = text_preprocessing_process(text)  # Menggunakan fungsi preprocessing sebelumnya
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_num_of_words)
    return padded_sequence

# Fungsi prediksi sentimen
def predict_sentiment(input_text):
    processed_input = preprocess_input_text(input_text)
    prediction = cnn_model_trial1.predict(processed_input)
    sentiment = np.argmax(prediction, axis=1)[0]  # Mendapatkan kelas prediksi (0, 1, atau 2)
    confidence = prediction[0][sentiment]  # Probabilitas kelas tertinggi
    if sentiment == 0:
        return "bad", confidence
    elif sentiment == 1:
        return "good", confidence
    else:
        return "neutral", confidence

# Contoh input
data_input = "chatgpt very helpful"
predicted_sentiment, confidence = predict_sentiment(data_input)

print("Input:", data_input)
print("Predicted Sentiment:", predicted_sentiment)
print("Confidence:", round(confidence * 100, 2), "%")


In [25]:
# Trial-2 (embedding_trainable=False)

cnn_model_trial2 = buat_cnn(embedding_trainable=False, Conv1D_1=True, Conv1D_2=False, Conv1D_3=False, _learning_rate=0.01, model_name='CNN_Trial-2')
cnn_model_trial2_history = cnn_model_trial2.fit(x_train, y_train,epochs=10, verbose=1,batch_size=256,validation_split=0.2)

'''Plotting Model Architecture For Trial-2'''
tf.keras.utils.plot_model(cnn_model_trial2, show_shapes=True)
#to_file=('/content/drive/MyDrive/Python/CNN_Model_Trial_2.png')
#plt.savefig('/content/drive/MyDrive/Python/CNN Model Trial 2.png')

cnn_model_trials_dict['Embedding layer Trainable'][1]='No'

Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 350ms/step - accuracy: 0.4415 - f1_m: 0.1906 - loss: 2.0114 - precision_m: 0.5934 - recall_m: 0.1207 - val_accuracy: 0.5829 - val_f1_m: 0.4090 - val_loss: 1.1298 - val_precision_m: 0.7476 - val_recall_m: 0.2821
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m105s[0m 350ms/step - accuracy: 0.5900 - f1_m: 0.4819 - loss: 1.1740 - precision_m: 0.6879 - recall_m: 0.3774 - val_accuracy: 0.7164 - val_f1_m: 0.6864 - val_loss: 1.0434 - val_precision_m: 0.7769 - val_recall_m: 0.6150
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m109s[0m 364ms/step - accuracy: 0.6726 - f1_m: 0.6376 - loss: 1.1369 - precision_m: 0.7335 - recall_m: 0.5646 - val_accuracy: 0.7479 - val_f1_m: 0.7372 - val_loss: 0.9895 - val_precision_m: 0.7854 - val_recall_m: 0.6947
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 338ms/step - accuracy: 0.6987 - f1_m: 0.6738 - loss

In [26]:
# Trial-3 (embedding_trainable=False)

cnn_model_trial3 = buat_cnn(embedding_trainable=False, Conv1D_1=True, Conv1D_2=True, Conv1D_3=True, _learning_rate=0.001, model_name='CNN_Trial-3')
cnn_model_trial3_history = cnn_model_trial3.fit(x_train, y_train,epochs=10, verbose=1,batch_size=256,validation_split=0.2)

'''Plotting Model Architecture For Trial-3'''
#tf.keras.utils.plot_model(cnn_model_trial3, show_shapes=True)
#plt.savefig('/content/drive/MyDrive/Python/CNN Model (Trial 3).png')

cnn_model_trials_dict['Embedding layer Trainable'][2]='No'

Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m138s[0m 444ms/step - accuracy: 0.4934 - f1_m: 0.2742 - loss: 1.8048 - precision_m: 0.5774 - recall_m: 0.1963 - val_accuracy: 0.7706 - val_f1_m: 0.7608 - val_loss: 0.6819 - val_precision_m: 0.8057 - val_recall_m: 0.7209
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m133s[0m 443ms/step - accuracy: 0.7888 - f1_m: 0.7767 - loss: 0.6517 - precision_m: 0.8272 - recall_m: 0.7323 - val_accuracy: 0.8048 - val_f1_m: 0.8008 - val_loss: 0.5578 - val_precision_m: 0.8413 - val_recall_m: 0.7641
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 423ms/step - accuracy: 0.8590 - f1_m: 0.8544 - loss: 0.4603 - precision_m: 0.8853 - recall_m: 0.8257 - val_accuracy: 0.8165 - val_f1_m: 0.8144 - val_loss: 0.5380 - val_precision_m: 0.8375 - val_recall_m: 0.7927
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 415ms/step - accuracy: 0.8995 - f1_m: 0.8983 - loss

In [27]:
# Trial-4 (embedding_trainable=True)

cnn_model_trial4 = buat_cnn(embedding_trainable=True, Conv1D_1=True, Conv1D_2=False, Conv1D_3=False, _learning_rate=0.001, model_name='CNN_Trial-4')
cnn_model_trial4_history = cnn_model_trial4.fit(x_train, y_train,epochs=10, verbose=1,batch_size=256,validation_split=0.2)

'''Plotting Model Architecture For Trial-4'''
tf.keras.utils.plot_model(cnn_model_trial4, show_shapes=True)
#plt.savefig('/content/drive/MyDrive/Python/CNN Model (Trial 4).png')

cnn_model_trials_dict['Embedding layer Trainable'][3]='Yes'

Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m175s[0m 548ms/step - accuracy: 0.5416 - f1_m: 0.3707 - loss: 2.4348 - precision_m: 0.6182 - recall_m: 0.2961 - val_accuracy: 0.8616 - val_f1_m: 0.8574 - val_loss: 0.6180 - val_precision_m: 0.8853 - val_recall_m: 0.8314
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m206s[0m 562ms/step - accuracy: 0.8772 - f1_m: 0.8734 - loss: 0.5743 - precision_m: 0.8942 - recall_m: 0.8536 - val_accuracy: 0.9041 - val_f1_m: 0.9034 - val_loss: 0.4494 - val_precision_m: 0.9167 - val_recall_m: 0.8905
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m164s[0m 546ms/step - accuracy: 0.9187 - f1_m: 0.9169 - loss: 0.4252 - precision_m: 0.9281 - recall_m: 0.9059 - val_accuracy: 0.9102 - val_f1_m: 0.9100 - val_loss: 0.4159 - val_precision_m: 0.9198 - val_recall_m: 0.9005
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m156s[0m 520ms/step - accuracy: 0.9350 - f1_m: 0.9339 - loss

In [28]:
# Trial-5 (embedding_trainable=False)

cnn_model_trial5 = buat_cnn(embedding_trainable=False, Conv1D_1=True, Conv1D_2=True, Conv1D_3=True, _learning_rate=0.0001, model_name='CNN_Trial-5')
cnn_model_trial5_history = cnn_model_trial5.fit(x_train, y_train,epochs=10, verbose=1,batch_size=256,validation_split=0.2)

'''Plotting Model Architecture For Trial-5'''
tf.keras.utils.plot_model(cnn_model_trial5, show_shapes=True)
#plt.savefig('/content/drive/MyDrive/Python/CNN Model (Trial 5).png')

cnn_model_trials_dict['Embedding layer Trainable'][4]='No'

Epoch 1/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m141s[0m 449ms/step - accuracy: 0.4211 - f1_m: 0.0712 - loss: 2.4551 - precision_m: 0.4407 - recall_m: 0.0415 - val_accuracy: 0.6023 - val_f1_m: 0.5159 - val_loss: 1.9898 - val_precision_m: 0.6802 - val_recall_m: 0.4160
Epoch 2/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m134s[0m 447ms/step - accuracy: 0.6347 - f1_m: 0.5308 - loss: 1.8872 - precision_m: 0.7032 - recall_m: 0.4297 - val_accuracy: 0.7199 - val_f1_m: 0.6950 - val_loss: 1.5180 - val_precision_m: 0.7630 - val_recall_m: 0.6385
Epoch 3/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m127s[0m 424ms/step - accuracy: 0.7276 - f1_m: 0.6929 - loss: 1.4770 - precision_m: 0.7793 - recall_m: 0.6243 - val_accuracy: 0.7506 - val_f1_m: 0.7448 - val_loss: 1.2429 - val_precision_m: 0.7809 - val_recall_m: 0.7120
Epoch 4/10
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m125s[0m 416ms/step - accuracy: 0.7750 - f1_m: 0.7580 - loss

### **Testing Model CNN**

In [29]:
# Trial-1
training_score = cnn_model_trial1.evaluate(x_train, y_train)
testing_score = cnn_model_trial1.evaluate(x_test, y_test)

print(f'\nTrial 1')
print(f'Training Accuaracy (Trial 1): {round(training_score[1]*100,1)}%')
print(f'Testing Accuaracy (Trial 1): {round(testing_score[1]*100,1)}%')

'''   Save the data of trial 1 in cnn model trials dictionary   '''
cnn_model_trials_dict['Training Accuaracy'][0]=round(training_score[1]*100,1)
cnn_model_trials_dict['Testing Accuaracy'][0]=round(testing_score[1]*100,1)

[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 20ms/step - accuracy: 0.9238 - f1_m: 0.9230 - loss: 0.4254 - precision_m: 0.9339 - recall_m: 0.9127
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 20ms/step - accuracy: 0.8732 - f1_m: 0.8726 - loss: 0.5249 - precision_m: 0.8877 - recall_m: 0.8583

Trial 1
Training Accuaracy (Trial 1): 91.5%
Testing Accuaracy (Trial 1): 87.4%


In [30]:
# Trial-2
training_score = cnn_model_trial2.evaluate(x_train, y_train)
testing_score = cnn_model_trial2.evaluate(x_test, y_test)

print(f'\nTrial 2')
print(f'Training Accuaracy (Trial 2): {round(training_score[1]*100,1)}%')
print(f'Testing Accuaracy (Trial 2): {round(testing_score[1]*100,1)}%')

'''   Save the data of trial 2 in cnn model trials dictionary   '''
cnn_model_trials_dict['Training Accuaracy'][1]=round(training_score[1]*100,1)
cnn_model_trials_dict['Testing Accuaracy'][1]=round(testing_score[1]*100,1)
cnn_model_trials_dict['Loss'][1] = round(testing_score[0], 2)

[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m61s[0m 20ms/step - accuracy: 0.8100 - f1_m: 0.8032 - loss: 0.9090 - precision_m: 0.8396 - recall_m: 0.7707
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 20ms/step - accuracy: 0.7775 - f1_m: 0.7679 - loss: 0.9756 - precision_m: 0.8053 - recall_m: 0.7348

Trial 2
Training Accuaracy (Trial 2): 80.3%
Testing Accuaracy (Trial 2): 78.0%


In [31]:
# Trial-3
training_score = cnn_model_trial3.evaluate(x_train, y_train)
testing_score = cnn_model_trial3.evaluate(x_test, y_test)

print(f'\nTrial 3')
print(f'Training Accuaracy (Trial 3): {round(training_score[1]*100,1)}%')
print(f'Testing Accuaracy (Trial 3): {round(testing_score[1]*100,1)}%')

'''   Save the data of trial 3 in cnn model trials dictionary   '''
cnn_model_trials_dict['Training Accuaracy'][2]=round(training_score[1]*100,1)
cnn_model_trials_dict['Testing Accuaracy'][2]=round(testing_score[1]*100,1)
cnn_model_trials_dict['Loss'][2] = round(testing_score[0], 2)

[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m68s[0m 23ms/step - accuracy: 0.9857 - f1_m: 0.9858 - loss: 0.0833 - precision_m: 0.9875 - recall_m: 0.9841
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 23ms/step - accuracy: 0.8151 - f1_m: 0.8151 - loss: 0.8836 - precision_m: 0.8211 - recall_m: 0.8095

Trial 3
Training Accuaracy (Trial 3): 95.5%
Testing Accuaracy (Trial 3): 81.7%


In [32]:
# Trial-4
training_score = cnn_model_trial4.evaluate(x_train, y_train)
testing_score = cnn_model_trial4.evaluate(x_test, y_test)

print(f'\nTrial 4')
print(f'Training Accuaracy (Trial 4): {round(training_score[1]*100,1)}%')
print(f'Testing Accuaracy (Trial 4): {round(testing_score[1]*100,1)}%')

'''   Save the data of trial 4 in cnn model trials dictionary   '''
cnn_model_trials_dict['Training Accuaracy'][3]=round(training_score[1]*100,1)
cnn_model_trials_dict['Testing Accuaracy'][3]=round(testing_score[1]*100,1)
cnn_model_trials_dict['Loss'][3] = round(testing_score[0], 2)

[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m60s[0m 20ms/step - accuracy: 0.9759 - f1_m: 0.9768 - loss: 0.1982 - precision_m: 0.9803 - recall_m: 0.9735
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 21ms/step - accuracy: 0.9141 - f1_m: 0.9130 - loss: 0.4232 - precision_m: 0.9200 - recall_m: 0.9064

Trial 4
Training Accuaracy (Trial 4): 96.4%
Testing Accuaracy (Trial 4): 91.4%


In [33]:
# Trial-5
training_score = cnn_model_trial5.evaluate(x_train, y_train)
testing_score = cnn_model_trial5.evaluate(x_test, y_test)

print(f'\nTrial 5')
print(f'Training Accuaracy (Trial 5): {round(training_score[1]*100,1)}%')
print(f'Testing Accuaracy (Trial 5): {round(testing_score[1]*100,1)}%\n\n')

'''   Save the data of trial 5 in cnn model trials dictionary   '''
cnn_model_trials_dict['Training Accuaracy'][4]=round(training_score[1]*100,1)
cnn_model_trials_dict['Testing Accuaracy'][4]=round(testing_score[1]*100,1)
cnn_model_trials_dict['Loss'][4] = round(testing_score[0], 2)

[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m69s[0m 23ms/step - accuracy: 0.9739 - f1_m: 0.9738 - loss: 0.2489 - precision_m: 0.9764 - recall_m: 0.9713
[1m750/750[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 23ms/step - accuracy: 0.7900 - f1_m: 0.7904 - loss: 0.8224 - precision_m: 0.8000 - recall_m: 0.7814

Trial 5
Training Accuaracy (Trial 5): 94.2%
Testing Accuaracy (Trial 5): 79.5%




In [34]:
# Membuat DataFrame hasil evaluasi
cnn_model_trials_df = pd.DataFrame(cnn_model_trials_dict)

# Tampilkan DataFrame
print(cnn_model_trials_df)

  Measurement  Training Accuaracy  Testing Accuaracy  Loss  \
0     Trial-1                91.5               87.4  0.00   
1     Trial-2                80.3               78.0  0.97   
2     Trial-3                95.5               81.7  0.88   
3     Trial-4                96.4               91.4  0.42   
4     Trial-5                94.2               79.5  0.80   

  Embedding layer Trainable  
0                       Yes  
1                        No  
2                        No  
3                       Yes  
4                        No  


In [None]:
#cnn_model_trial1.export('CG/CNN_model_trial1')
#cnn_model_trial2.save('CNN_model_trial2.h5')
#cnn_model_trial3.save('CNN_model_trial3.h5')
cnn_model_trial4.save('CNN_model_trial4.h5')
#cnn_model_trial5.save('CNN_model_trial5.h5')

In [None]:
#cnn_model_trial4.save('CNN_model_trial4.h5')



#**Prediksi**

In [50]:
# Fungsi untuk memproses teks input
def preprocess_input_text(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_num_of_words)
    return padded_sequence

# Fungsi prediksi sentimen
def predict_sentiment(input_text):
    processed_input = preprocess_input_text(input_text)
    prediction = cnn_model_trial2.predict(processed_input)
    sentiment = np.argmax(prediction, axis=1)[0]  # Mendapatkan kelas prediksi (0, 1, atau 2)
    confidence = prediction[0][sentiment]  # Probabilitas kelas tertinggi
    if sentiment == 0:
        return "bad", confidence
    elif sentiment == 1:
        return "good", confidence
    else:
        return "neutral", confidence

# Contoh input
data_input = "why do more than half of the chatgpt is going to change your industry and youll be unemployed forever if you arent prepared tweets seem celebratory and gleeful"
predicted_sentiment, confidence = predict_sentiment(data_input)

print("Input:", data_input)
print("Predicted Sentiment:", predicted_sentiment)
print("Confidence:", round(confidence * 100, 2), "%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 118ms/step
Input: why do more than half of the chatgpt is going to change your industry and youll be unemployed forever if you arent prepared tweets seem celebratory and gleeful
Predicted Sentiment: bad
Confidence: 72.63 %


In [60]:
# Fungsi untuk memproses teks input
def preprocess_input_text(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_num_of_words)
    return padded_sequence

# Fungsi prediksi sentimen
def predict_sentiment(input_text):
    processed_input = preprocess_input_text(input_text)
    prediction = cnn_model_trial3.predict(processed_input)
    sentiment = np.argmax(prediction, axis=1)[0]  # Mendapatkan kelas prediksi (0, 1, atau 2)
    confidence = prediction[0][sentiment]  # Probabilitas kelas tertinggi
    if sentiment == 0:
        return "bad", confidence
    elif sentiment == 1:
        return "good", confidence
    else:
        return "neutral", confidence

# Contoh input
data_input = "why do more than half of the chatgpt is going to change your industry and youll be unemployed forever if you arent prepared tweets seem celebratory and gleeful"
predicted_sentiment, confidence = predict_sentiment(data_input)

print("Input:", data_input)
print("Predicted Sentiment:", predicted_sentiment)
print("Confidence:", round(confidence * 100, 2), "%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
Input: why do more than half of the chatgpt is going to change your industry and youll be unemployed forever if you arent prepared tweets seem celebratory and gleeful
Predicted Sentiment: neutral
Confidence: 87.57 %


In [67]:
# Fungsi untuk memproses teks input
def preprocess_input_text(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_num_of_words)
    return padded_sequence

# Fungsi prediksi sentimen
def predict_sentiment(input_text):
    processed_input = preprocess_input_text(input_text)
    prediction = cnn_model_trial4.predict(processed_input)
    sentiment = np.argmax(prediction, axis=1)[0]  # Mendapatkan kelas prediksi (0, 1, atau 2)
    confidence = prediction[0][sentiment]  # Probabilitas kelas tertinggi
    if sentiment == 0:
        return "bad", confidence
    elif sentiment == 1:
        return "good", confidence
    else:
        return "neutral", confidence

# Contoh input
data_input = "really hate chatgpt"
predicted_sentiment, confidence = predict_sentiment(data_input)

print("Input:", data_input)
print("Predicted Sentiment:", predicted_sentiment)
print("Confidence:", round(confidence * 100, 2), "%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 75ms/step
Input: really hate chatgpt
Predicted Sentiment: bad
Confidence: 99.3 %


In [69]:
# Fungsi untuk memproses teks input
def preprocess_input_text(text):
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_num_of_words)
    return padded_sequence

# Fungsi prediksi sentimen
def predict_sentiment(input_text):
    processed_input = preprocess_input_text(input_text)
    prediction = cnn_model_trial5.predict(processed_input)
    sentiment = np.argmax(prediction, axis=1)[0]  # Mendapatkan kelas prediksi (0, 1, atau 2)
    confidence = prediction[0][sentiment]  # Probabilitas kelas tertinggi
    if sentiment == 0:
        return "bad", confidence
    elif sentiment == 1:
        return "good", confidence
    else:
        return "neutral", confidence

# Contoh input
data_input = "fuck"
predicted_sentiment, confidence = predict_sentiment(data_input)

print("Input:", data_input)
print("Predicted Sentiment:", predicted_sentiment)
print("Confidence:", round(confidence * 100, 2), "%")


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 579ms/step
Input: fuck
Predicted Sentiment: bad
Confidence: 99.05 %
