In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sample data: You should replace this with your own dataset
data = pd.DataFrame({'user_id': [1, 2, 3, 1, 2, 3],
                     'message': ['I love #travel', 'Exploring new places', 'Foodie #food',
                                 'Planning a #trip', 'Travel adventures', 'Delicious #food']})

# Preprocess the data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data['message'])
sequences = tokenizer.texts_to_sequences(data['message'])

X = pad_sequences(sequences)  # Padded sequences for RNN input

# Create labels (hashtags)
labels = data['message'].str.extractall(r'#(\w+)')[0].tolist()

# Build an RNN model
model = Sequential()
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=32, input_length=X.shape[1]))
model.add(LSTM(64))
model.add(Dense(len(labels), activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model (you should train on a larger dataset)
model.fit(X, np.zeros((len(X), len(labels))), epochs=10, batch_size=2)

# Make hashtag recommendations for a user (user_id=1)
user_messages = data[data['user_id'] == 2]['message']
user_sequences = tokenizer.texts_to_sequences(user_messages)
user_X = pad_sequences(user_sequences, maxlen=X.shape[1])

recommendations = model.predict(user_X)
recommended_hashtags = [labels[i] for i in recommendations.argmax(axis=1)]
print("Recommended hashtags:", recommended_hashtags)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Recommended hashtags: ['travel', 'travel']
