In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, Concatenate, Input, Dropout
from tensorflow.keras.regularizers import l2
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import nltk

nltk.download('stopwords')
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')
nltk.download('omw-1.4')
nltk.download('wordnet')

class RestaurantRecommender:
    def __init__(self):
        self.le = LabelEncoder()
        self.ohe = OneHotEncoder(sparse=False)
        self.lemmatizer = WordNetLemmatizer()
        self.stop_words = set(stopwords.words('english'))
        self.model = None

    def preprocess_data(self, df):
        # Encode categorical features
        df['location_encoded'] = self.le.fit_transform(df['location'])
        df['cuisines_encoded'] = self.le.fit_transform(df['cuisines'])
        df['rest_type_encoded'] = self.le.fit_transform(df['rest_type'])

        # One-hot encode price range
        price_encoded = self.ohe.fit_transform(df['price'].values.reshape(-1, 1))
        df = pd.concat([df, pd.DataFrame(price_encoded, columns=self.ohe.categories_[0])], axis=1)

        # Create user and item IDs
        user_ids = df['user_id'].unique()
        self.user_id_map = {u: i for i, u in enumerate(user_ids)}
        df['user_id_encoded'] = df['user_id'].map(self.user_id_map)

        item_ids = df['restaurant_id'].unique()
        self.item_id_map = {i: j for j, i in enumerate(item_ids)}
        df['item_id_encoded'] = df['restaurant_id'].map(self.item_id_map)

        return df

    def preprocess_user_input(self, user_input):
        # Clean and preprocess user input text
        tokens = word_tokenize(user_input.lower())
        filtered_tokens = [word for word in tokens if word not in self.stop_words and word.isalpha()]
        lemmatized_tokens = [self.lemmatizer.lemmatize(word) for word in filtered_tokens]
        processed_input = ' '.join(lemmatized_tokens)
        return processed_input

    def build_model(self, num_users, num_items, embedding_dim=50, hidden_units=[128, 64], dropout_rate=0.2):
        # Create the model
        model = Sequential()

        # User embedding
        model.add(Embedding(num_users, embedding_dim, input_shape=(1,)))
        model.add(Flatten())

        # Item embedding
        model.add(Embedding(num_items, embedding_dim, input_shape=(1,)))
        model.add(Flatten())

        # Concatenate user, item, and categorical features
        model.add(Concatenate(axis=1))

        # Add dense layers
        for units in hidden_units:
            model.add(Dense(units, activation='relu', kernel_regularizer=l2(0.01)))
            model.add(Dropout(dropout_rate))

        # Output layer
        model.add(Dense(1))

        # Compile the model
        model.compile(optimizer='adam', loss='mse', metrics=['mae'])

        return model

    def train_model(self, X_train, y_train, X_test, y_test, epochs=10, batch_size=32):
        self.model = self.build_model(len(self.user_id_map), len(self.item_id_map))
        self.model.fit(X_train.values, y_train.values, epochs=epochs, batch_size=batch_size, validation_data=(X_test.values, y_test.values))
        self.model.save('restaurant_recommender_model.h5')

    def predict(self, user_id, item_id, features):
        user_encoded = self.user_id_map[user_id]
        item_encoded = self.item_id_map[item_id]
        input_data = [user_encoded, item_encoded] + list(features)
        input_data = np.array(input_data).reshape(1, -1)
        prediction = self.model.predict(input_data)
        return prediction[0][0]

# Example usage
if __name__ == "__main__":
    # Load and preprocess data
    df = pd.read_csv("cleaned_file.csv")  # Replace with your data file
    recommender = RestaurantRecommender()
    df = recommender.preprocess_data(df)

    # Split data into training and testing sets
    X = df[['user_id_encoded', 'item_id_encoded', 'location_encoded', 'cuisines_encoded', 'rest_type_encoded', 'cheap', 'moderate', 'fine-dining']]
    y = df['rating']
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Train the model
    recommender.train_model(X_train, y_train, X_test, y_test)

    # Make a prediction
    user_id = 'user123'
    item_id = 'restaurant_abc'
    features = [1, 2, 3, 0, 1, 0]  # Example feature values
    predicted_rating = recommender.predict(user_id, item_id, features)
    print(f"Predicted rating for user {user_id} and item {item_id}: {predicted_rating}")