In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from fuzzywuzzy import process
import re
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.layers import Dense, Normalization
import matplotlib.pyplot as plt

def remove_extra_spaces(text):
    return re.sub(r'\s+', ' ', text.strip())

def normalize_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z0-9\s]', '', text)
    return text

def closest_word(text, dataset_texts):
    highest_similarity_score = 0
    closest_match = None
    for word in text.split():
        closest_match_for_word, similarity_score = process.extractOne(word, dataset_texts)
        if similarity_score > highest_similarity_score:
            highest_similarity_score = similarity_score
            closest_match = closest_match_for_word
    return closest_match, highest_similarity_score

def preprocess(df, dataset_texts):
    df['Deskripsi'] = df['Deskripsi'].apply(remove_extra_spaces)
    df['Deskripsi'] = df['Deskripsi'].apply(normalize_text)
    
    closest_words = []
    closest_words_num = []
    closest_words_score = []
    for sentence in df['Deskripsi']:
        closest_match, similarity_score = closest_word(sentence, dataset_texts)
        if similarity_score < 20:
            closest_word_num = 0
            closest_match = "missing"
        else:
            closest_word_num = dataset_texts.index(closest_match) + 1
        closest_words.append(closest_match)
        closest_words_num.append(closest_word_num)
        closest_words_score.append(similarity_score)
    
    df['closest_words'] = closest_words
    df['closest_words_num'] = closest_words_num
    df['score'] = closest_words_score
    
    return df[['closest_words_num','Nominal']].values, df['Verifikasi'].values

# Load the DataFrame
df = pd.read_csv('dummy2.csv')

# Example dataset_texts
dataset_texts = ["shf", "settlement", "fidusia", "pinalty", "umk"]

# Preprocess the data
X, y = preprocess(df, dataset_texts)

# Encode the target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Convert DataFrame to tensors
X = tf.convert_to_tensor(X, dtype=tf.float32)

# Normalize the features
normalizer = Normalization(axis=-1)
normalizer.adapt(X)

# Build and compile the model
def build_and_compile_model(norm):
    model = tf.keras.Sequential([
        norm,
        Dense(64, activation='relu'),
        Dense(64, activation='relu'),
        Dense(len(label_encoder.classes_), activation='softmax')
    ])

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=tf.keras.optimizers.Adam(0.001),
                  metrics=['accuracy'])
    return model

# Build the model
model = build_and_compile_model(normalizer)

# Split the data into training and testing sets
train_dataset = df.sample(frac=0.8, random_state=0)
test_dataset = df.drop(train_dataset.index)

train_features, train_labels = preprocess(train_dataset, dataset_texts)
test_features, test_labels = preprocess(test_dataset, dataset_texts)

train_features = tf.convert_to_tensor(train_features, dtype=tf.float32)
test_features = tf.convert_to_tensor(test_features, dtype=tf.float32)

# Train the model
model.fit(train_features, train_labels, epochs=500, batch_size=32, validation_split=0.3)

# Predict on test data
test_predictions = np.argmax(model.predict(test_features), axis=-1)

# Decode the true labels
y_test_decoded = label_encoder.inverse_transform(test_labels)

# Decode the predicted labels
y_pred_decoded = label_encoder.inverse_transform(test_predictions)

# Print classification report
print(classification_report(y_test_decoded, y_pred_decoded))

# Visualize the predictions
plt.figure(figsize=(8, 8))
plt.scatter(y_test_decoded, y_pred_decoded)
plt.xlabel('True Values')
plt.ylabel('Predictions')
plt.title('True Values vs Predictions')
plt.show()


Epoch 1/500


2024-04-12 15:08:36.647567: W tensorflow/core/framework/op_kernel.cc:1816] OP_REQUIRES failed at cast_op.cc:122 : UNIMPLEMENTED: Cast string to float is not supported


UnimplementedError: Graph execution error:

Detected at node Cast_1 defined at (most recent call last):
  File "<frozen runpy>", line 198, in _run_module_as_main

  File "<frozen runpy>", line 88, in _run_code

  File "/usr/lib/python3.11/site-packages/ipykernel_launcher.py", line 18, in <module>

  File "/usr/lib/python3.11/site-packages/traitlets/config/application.py", line 1075, in launch_instance

  File "/usr/lib/python3.11/site-packages/ipykernel/kernelapp.py", line 739, in start

  File "/usr/lib/python3.11/site-packages/tornado/platform/asyncio.py", line 195, in start

  File "/usr/lib/python3.11/asyncio/base_events.py", line 607, in run_forever

  File "/usr/lib/python3.11/asyncio/base_events.py", line 1922, in _run_once

  File "/usr/lib/python3.11/asyncio/events.py", line 80, in _run

  File "/usr/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 542, in dispatch_queue

  File "/usr/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 531, in process_one

  File "/usr/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell

  File "/usr/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 359, in execute_request

  File "/usr/lib/python3.11/site-packages/ipykernel/kernelbase.py", line 775, in execute_request

  File "/usr/lib/python3.11/site-packages/ipykernel/ipkernel.py", line 446, in do_execute

  File "/usr/lib/python3.11/site-packages/ipykernel/zmqshell.py", line 549, in run_cell

  File "/usr/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3051, in run_cell

  File "/usr/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3106, in _run_cell

  File "/usr/lib/python3.11/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner

  File "/usr/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3311, in run_cell_async

  File "/usr/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3493, in run_ast_nodes

  File "/usr/lib/python3.11/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code

  File "/tmp/ipykernel_119405/1283609263.py", line 102, in <module>

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1807, in fit

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1401, in train_function

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1384, in step_function

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1373, in run_step

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1155, in train_step

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/engine/training.py", line 1249, in compute_metrics

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/engine/compile_utils.py", line 620, in update_state

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/utils/metrics_utils.py", line 77, in decorated

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/metrics/base_metric.py", line 140, in update_state_fn

  File "/home/gusanwa/.local/lib/python3.11/site-packages/keras/src/metrics/base_metric.py", line 708, in update_state

Cast string to float is not supported
	 [[{{node Cast_1}}]] [Op:__inference_train_function_2783]