In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
import string
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Load user data and place data from CSV
user_data = pd.read_csv('C:\\Users\\ROG\\OneDrive\\Desktop\\Root\\ML\\user_data.csv')
places_data = pd.read_csv('C:\\Users\\ROG\\OneDrive\\Desktop\\Root\\ML\\places_data.csv')

# NLP Preprocessing function
def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    
    # Remove punctuation
    text = text.translate(str.maketrans('', '', string.punctuation))
    
    # Tokenize text
    words = word_tokenize(text)
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    filtered_words = [word for word in words if word not in stop_words]
    
    # Remove numbers
    filtered_words = [word for word in filtered_words if not word.isdigit()]
    
    # Lemmatize words
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]
    
    return ' '.join(lemmatized_words)

# Apply preprocessing to the activities and reviews in both datasets
user_data['Preferred Activities'] = user_data['Preferred Activities'].apply(preprocess_text)
places_data['latest_reviews'] = places_data['latest_reviews'].apply(preprocess_text)

# Combine the 'latest_reviews' column for better representation of the place description
places_data['Place Description'] = places_data['latest_reviews']

# Vectorize user and place data separately using TF-IDF
user_vectorizer = TfidfVectorizer()
user_tfidf = user_vectorizer.fit_transform(user_data['Preferred Activities'])

place_vectorizer = TfidfVectorizer()
place_tfidf = place_vectorizer.fit_transform(places_data['Place Description'])

# Encode place names as target labels
label_encoder = LabelEncoder()
place_labels = label_encoder.fit_transform(places_data['name'])

# Simulate user-place interaction data
# Let's assume each user is interested in 1 place. We will match the length of the user data with the places.
num_users = user_tfidf.shape[0]
num_places = place_labels.shape[0]

# Repeat the place labels if necessary to match the number of users
repeated_place_labels = np.tile(place_labels, num_users // num_places + 1)[:num_users]

# Now, perform the train-test split with matched sizes
X_train, X_test, y_train, y_test = train_test_split(user_tfidf.toarray(), repeated_place_labels, test_size=0.2, random_state=42)

# Build the Neural Network model
model = Sequential([
    Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.5),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(len(places_data), activation='softmax')
])

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Train the model
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2, callbacks=[early_stopping])

# Function to recommend places using the Neural Network
def recommend_places_nn(user_index, model, user_tfidf, place_data, label_encoder):
    # Get the user's TF-IDF vector and reshape for prediction
    user_vector = user_tfidf[user_index].reshape(1, -1)
    
    # Predict the probabilities of each place
    predictions = model.predict(user_vector)
    
    # Get the top 5 places with the highest probabilities
    top_5_indices = predictions[0].argsort()[-5:][::-1]
    
    # Decode the predicted place indices back to place names
    recommended_places = label_encoder.inverse_transform(top_5_indices)
    
    # Return the recommended places
    return place_data[place_data['name'].isin(recommended_places)][['name', 'rating', 'latest_reviews']]

# Example: Recommend places for the first user
recommended_places = recommend_places_nn(0, model, user_tfidf, places_data, label_encoder)
print(f"Top 5 recommendations for {user_data.iloc[0]['Name']}:\n", recommended_places)

# Example: Evaluate model performance
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Model Evaluation - Loss: {loss}, Accuracy: {accuracy}")

In [None]:
import pickle
with open('model.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)