In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt
import seaborn as sns

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Load user data and place data from CSV
user_data = pd.read_csv('C:\\Users\\ROG\\OneDrive\\Desktop\\Root\\ML\\user_data.csv')
places_data = pd.read_csv('C:\\Users\\ROG\\OneDrive\\Desktop\\Root\\ML\\places_data.csv')

# NLP Preprocessing function
def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    # Tokenize text
    words = word_tokenize(text)
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]
    
    return ' '.join(filtered_words)

# Apply preprocessing to the activities and reviews in both datasets
user_data['Preferred Activities'] = user_data['Preferred Activities'].apply(preprocess_text)
places_data['latest_reviews'] = places_data['latest_reviews'].apply(preprocess_text)

# Combine the 'latest_reviews' column for better representation of the place description
places_data['Place Description'] = places_data['latest_reviews']

# Vectorize user and place data separately using TF-IDF with bigrams
user_vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
user_tfidf = user_vectorizer.fit_transform(user_data['Preferred Activities'])

place_vectorizer = TfidfVectorizer(ngram_range=(1, 2), max_features=5000)
place_tfidf = place_vectorizer.fit_transform(places_data['Place Description'])

# Encode place names as target labels
label_encoder = LabelEncoder()
place_labels = label_encoder.fit_transform(places_data['name'])

# Simulate user-place interaction data
num_users = user_tfidf.shape[0]
num_places = place_labels.shape[0]

# Repeat the place labels if necessary to match the number of users
repeated_place_labels = np.tile(place_labels, num_users // num_places + 1)[:num_users]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(user_tfidf.toarray(), repeated_place_labels, test_size=0.2, random_state=42)

# Build the Neural Network model
model = Sequential([
    Dense(256, activation='relu', input_shape=(X_train.shape[1],), kernel_regularizer=l2(0.001)),
    Dropout(0.4),
    Dense(128, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.4),
    Dense(64, activation='relu', kernel_regularizer=l2(0.001)),
    Dropout(0.4),
    Dense(len(places_data), activation='softmax')
])

# Compile the model with Adam optimizer and learning rate decay
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Learning rate scheduler
def scheduler(epoch, lr):
    return lr * 0.9  # Reduce the learning rate by 10% every epoch

lr_callback = LearningRateScheduler(scheduler)

# Early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.2, callbacks=[lr_callback, early_stopping])

# Predict on the test set
y_pred_probs = model.predict(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)  # Get the index of the class with the highest probability

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# Plot confusion matrix using Seaborn for better visualization
plt.figure(figsize=(10, 8))
sns.heatmap(conf_matrix, annot=False, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('True Label')
plt.xlabel('Predicted Label')
plt.show()

# Function to recommend places using the Neural Network
def recommend_places_nn(user_index, model, user_tfidf, place_data, label_encoder):
    # Get the user's TF-IDF vector and reshape for prediction
    user_vector = user_tfidf[user_index].reshape(1, -1)
    
    # Predict the probabilities of each place
    predictions = model.predict(user_vector)
    
    # Get the top 5 places with the highest probabilities
    top_5_indices = predictions[0].argsort()[-5:][::-1]
    
    # Decode the predicted place indices back to place names
    recommended_places = label_encoder.inverse_transform(top_5_indices)
    
    # Return the recommended places
    return place_data[place_data['name'].isin(recommended_places)][['name', 'rating', 'latest_reviews']]

# Example: Recommend places for the first user
recommended_places = recommend_places_nn(0, model, user_tfidf, places_data, label_encoder)
print(f"Top 5 recommendations for {user_data.iloc[0]['Name']}:\n", recommended_places)

# Example: Evaluate model performance
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Model Evaluation - Loss: {loss}, Accuracy: {accuracy}")
