In [131]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.layers import Input

In [132]:
# Load the dataset
df = pd.read_excel('Activity_Dataset.xlsx')
df.head()

Unnamed: 0,Activity Title,Category
0,Jam session with simple percussion instruments,casual
1,Handloom weaving demonstration,ethnic
2,Panel on balancing creativity and productivity,smart casual
3,Appreciation of traditional calligraphy styles,ethnic
4,Folk music learning session,ethnic


In [153]:
# Separate features (Activity Title) and labels (Category)
X = df['Activity Title']
y = df['Category']

# Vectorize the text data
vectorizer = CountVectorizer(stop_words='english')
X_vectorized = vectorizer.fit_transform(X)

# Encode labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)
y_categorical = to_categorical(y_encoded)

In [154]:
# Split the dataset
X_train, X_temp, y_train, y_temp = train_test_split(
    X_vectorized.toarray(), y_categorical, test_size=0.2, random_state=42
)  # 80% training data

X_valid, X_test, y_valid, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42
)  # 10% validation and 10% testing data

In [155]:
# Neural Network model 
model = Sequential([
    Input(shape=(X_train.shape[1],)),
    Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.4),
    Dense(y_categorical.shape[1], activation='softmax')  # Output layer
])

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [156]:
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

In [157]:
# Train the model
history = model.fit(
    X_train, y_train,
    epochs=30,
    batch_size=32,
    validation_data=(X_valid, y_valid),
    callbacks=[early_stopping],
    verbose=1
)

Epoch 1/30
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 18ms/step - accuracy: 0.1545 - loss: 2.6275 - val_accuracy: 0.2474 - val_loss: 1.9167
Epoch 2/30
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.3451 - loss: 1.7555 - val_accuracy: 0.2577 - val_loss: 1.8718
Epoch 3/30
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5307 - loss: 1.3145 - val_accuracy: 0.3093 - val_loss: 1.7903
Epoch 4/30
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6812 - loss: 0.9005 - val_accuracy: 0.3711 - val_loss: 1.6529
Epoch 5/30
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7481 - loss: 0.6775 - val_accuracy: 0.5000 - val_loss: 1.4579
Epoch 6/30
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8244 - loss: 0.5280 - val_accuracy: 0.6082 - val_loss: 1.2499
Epoch 7/30
[1m49/49[0m [32m━━━━━━━━━

In [158]:
# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

print(f'Test Accuracy: {test_accuracy}')

Test Accuracy: 0.8298969268798828


In [159]:
# Function to predict the category from new input
def predict_category(input_text):
    # Transform the input text using the trained vectorizer
    input_vectorized = vectorizer.transform([input_text])
    
    # Predict using the model
    predictions = model.predict(input_vectorized.toarray())
    
    # Get the index of the category with the highest probability
    predicted_index = predictions.argmax()
    
    # Convert the index to the category name using label_encoder
    predicted_category = label_encoder.inverse_transform([predicted_index])[0]
    
    return predicted_category

# prediction
new_input = "capstone project presentation"
predicted_category = predict_category(new_input)
print(f"{new_input} : {predicted_category}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 225ms/step
capstone project presentation : formal
