In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.utils import shuffle

In [None]:
!gdown -- 15fyGgYoLh0TjKuPwRgA6suaTZhxV67Iw

Downloading...
From: https://drive.google.com/uc?id=15fyGgYoLh0TjKuPwRgA6suaTZhxV67Iw
To: /content/ngirit_dataset.csv
  0% 0.00/1.15M [00:00<?, ?B/s]100% 1.15M/1.15M [00:00<00:00, 148MB/s]


In [None]:
# Load and preprocess your dataset
data = pd.read_csv('ngirit_dataset.csv')

In [None]:
data = data.drop('merchant_area',axis=1).replace(to_replace = 'NAN', value=np.NaN)
data['rating'] = pd.to_numeric(data['rating'])
data['rating'].fillna(float(data['rating'].mean()),inplace=True)

In [None]:
# Separate the features (X) and the target variable (y)
X = data[['merchant_name', 'latitude', 'longitude', 'rating', 'product', 'price']]
y = data['sub_category']

In [None]:
# Convert categorical variables to numerical using LabelEncoder
label_encoder = LabelEncoder()
data['merchant_name'] = label_encoder.fit_transform(data['merchant_name'])
data['main_category'] = label_encoder.fit_transform(data['main_category'])
data['sub_category'] = label_encoder.fit_transform(data['sub_category'])
data['product'] = label_encoder.fit_transform(data['product'])

In [None]:
# Normalize numerical features
scaler = StandardScaler()
X[['latitude', 'longitude', 'rating', 'price']] = scaler.fit_transform(X[['latitude', 'longitude', 'rating', 'price']])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[['latitude', 'longitude', 'rating', 'price']] = scaler.fit_transform(X[['latitude', 'longitude', 'rating', 'price']])


In [None]:
# Convert the target variable to numeric labels
y = label_encoder.fit_transform(y)

In [None]:
# Shuffle the data to ensure randomness
X, y = shuffle(X, y, random_state=42)

In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Check if target categories are imbalanced
category_counts = np.bincount(y_train)
print("Category Counts:", category_counts)

Category Counts: [ 291  134   78   63  430  751   46 2072  544  605  163  157 1983  129
  664   15   28  549   64]


In [None]:
# Define the model architecture
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001),
                          input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dense(32, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.001)),
    tf.keras.layers.Dense(len(np.unique(y)), activation='softmax')
])

In [None]:
# Compile the model
optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Train the model
early_stopping = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=1, validation_data=(X_test, y_test),
          callbacks=[early_stopping])

ValueError: ignored

In [None]:
# Evaluate the model on the testing set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print('Test Loss:', test_loss)
print('Test Accuracy:', test_accuracy)

Test Loss: 2.330993413925171
Test Accuracy: 0.23494525253772736


In [None]:
# Assuming X_new contains the new input features
X_new = np.array([[merchant_name, latitude, longitude, rating, product, price]])

# Preprocess X_new similarly to the training data (encode categorical features, normalize numerical features)
X_new[:, 0] = label_encoder.transform(X_new[:, 0])  # Encode merchant_name
X_new[:, 4] = label_encoder.transform(X_new[:, 4])  # Encode product
X_new[:, [1, 2, 3, 5]] = scaler.transform(X_new[:, [1, 2, 3, 5]])  # Normalize latitude, longitude, rating, price

NameError: ignored

In [None]:
# Make predictions using the trained model
predictions = model.predict(X_new)

# Decode the predicted labels or transform them into their original format
predicted_labels = label_encoder.inverse_transform(np.argmax(predictions, axis=1))

print(predicted_labels)

UnimplementedError: ignored

In [None]:
# Save the model in TensorFlow SavedModel format
tf.saved_model.save(model, 'saved_model')

In [None]:
# Convert the TensorFlow SavedModel to TensorFlow.js format
tfjs.converters.convert_tf_saved_model('saved_model', 'tfjs_model')