<a href="https://colab.research.google.com/github/Rohit-Yadav-47/POI/blob/main/tourist_recommendaton_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Install necessary libraries (if not already installed)
# Uncomment the following lines if you need to install any packages
# !pip install pandas numpy scikit-learn tensorflow joblib

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
import joblib  # For saving the LabelEncoder

# Load the dataset
df = pd.read_csv("/content/data.csv")

# Display the first few rows of the dataset
print("Initial DataFrame:")
print(df.head())

# Display the column names
print("\nColumns in DataFrame:")
print(df.columns)

# Replace '-' with NaN to handle missing values
df.replace('-', np.nan, inplace=True)

# Fill missing values in PRIORITY columns with 'Unknown'
priority_columns = ['PRIORITY_1', 'PRIORITY_2', 'PRIORITY_3', 'PRIORITY_4', 'PRIORITY_5']
for col in priority_columns:
    df[col].fillna('Unknown', inplace=True)

# Display the DataFrame after filling missing values
print("\nDataFrame after filling missing PRIORITY values:")
print(df.head())

# Perform one-hot encoding on the PRIORITY columns
df = pd.get_dummies(df, columns=priority_columns)

# Display the DataFrame after one-hot encoding
print("\nDataFrame after one-hot encoding PRIORITY columns:")
print(df.head())

# Display the updated column names
print("\nUpdated Columns after One-Hot Encoding:")
print(df.columns)

# Create a mask to identify rows where all priority columns are zero or 'Unknown'
priority_dummies = [col for col in df.columns if col.startswith('PRIORITY_')]
mask = (df[priority_dummies] == 0).all(axis=1)

# Create a new column "PRIORITY_Unknown" to indicate the lowest priority
df['PRIORITY_Unknown'] = mask.astype(int)

# Display the DataFrame after adding 'PRIORITY_Unknown' column
print("\nDataFrame after adding 'PRIORITY_Unknown' column:")
print(df.head())

# Define feature matrix X and target vector y
X = df.drop(['PID', 'POIs'], axis=1)  # Features (excluding PID and POIs columns)
y = df['POIs']  # Target variable

# Encode the target variable
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Display encoded target values
print("\nEncoded target values:")
print(y_encoded)

# Save the LabelEncoder for future use
joblib.dump(label_encoder, 'label_encoder.joblib')
print("\nLabelEncoder has been saved as 'label_encoder.joblib'.")

# Split the dataset into training and testing sets
X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42
)

# Convert feature columns to numerical types (int)
X_train = X_train.astype(int)
X_test = X_test.astype(int)

# Verify the data types
print("\nData Types of Training Features After Conversion:")
print(X_train.dtypes)

print("\nData Types of Testing Features After Conversion:")
print(X_test.dtypes)

# Display the number of features and classes
number_of_features = X_train.shape[1]
number_of_classes = len(label_encoder.classes_)
print(f"\nNumber of features: {number_of_features}")
print(f"Number of classes: {number_of_classes}")

# Define the neural network model with additional layers to induce overfitting
model = tf.keras.Sequential([
    tf.keras.layers.Input(shape=(number_of_features,)),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(256, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(number_of_classes, activation='softmax')
])

# Compile the model
model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Display the model architecture
print("\nNeural Network Model Summary:")
model.summary()

# Train the model
print("\nTraining the model...")
history = model.fit(
    X_train, y_train_encoded,
    epochs=500,
    batch_size=100,
    validation_split=0.1,
    verbose=1
)

# Save the trained model
model.save('poi_model.h5')
print("\nModel has been saved as 'poi_model.h5'.")


Initial DataFrame:
    PID                                 POIs           PRIORITY_1  \
0  POI1                         AMBER PALACE  History and Culture   
1  POI2                   CITY PALACE JAIPUR  History and Culture   
2  POI3                 JANTAR MANTAR JAIPUR  History and Culture   
3  POI4                           HAWA MAHAL  History and Culture   
4  POI5  ALBERT HALL MUSEUM (CENTRAL MUSEUM)               Museum   

            PRIORITY_2         PRIORITY_3 PRIORITY_4       PRIORITY_5  
0               Museum  Local Experiences     Scenic        Adventure  
1               Museum  Local Experiences     Scenic  Food and Drinks  
2               Museum                  -          -                -  
3                    -  Local Experiences     Scenic                -  
4  History and Culture             Scenic          -                -  

Columns in DataFrame:
Index(['PID', 'POIs', 'PRIORITY_1', 'PRIORITY_2', 'PRIORITY_3', 'PRIORITY_4',
       'PRIORITY_5'],
      dtype

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna('Unknown', inplace=True)



Training the model...
Epoch 1/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 738ms/step - accuracy: 0.0000e+00 - loss: 5.9085 - val_accuracy: 0.0000e+00 - val_loss: 5.9101
Epoch 2/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 11ms/step - accuracy: 0.0049 - loss: 5.9066 - val_accuracy: 0.0000e+00 - val_loss: 5.9126
Epoch 3/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0019 - loss: 5.9049 - val_accuracy: 0.0000e+00 - val_loss: 5.9152
Epoch 4/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.0019 - loss: 5.9028 - val_accuracy: 0.0000e+00 - val_loss: 5.9180
Epoch 5/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0049 - loss: 5.8995 - val_accuracy: 0.0000e+00 - val_loss: 5.9208
Epoch 6/500
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - accuracy: 0.0019 - loss: 5.8921 - val_accuracy: 0.0000e+00 - val_lo




Model has been saved as 'poi_model.h5'.


In [5]:
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import joblib  # For loading the LabelEncoder

# Load the saved model
loaded_model = tf.keras.models.load_model('poi_model.h5')
print("\nModel has been loaded from 'poi_model.h5'.")

# Load the saved LabelEncoder
loaded_label_encoder = joblib.load('label_encoder.joblib')
print("LabelEncoder has been loaded from 'label_encoder.joblib'.")

# Define user priorities
user_priorities = ['Scenic', 'Scenic', 'Scenic', 'Unknown', 'Unknown']

# Assuming you have the same feature columns as during training
# Load the original feature columns (excluding 'PID' and 'POIs')
# It's essential that the user_feature_vector has the same columns as X_train
# If not, you need to ensure the columns match

# For simplicity, reload the dataset to get the feature columns
df = pd.read_csv("/content/data.csv")
df.replace('-', np.nan, inplace=True)
priority_columns = ['PRIORITY_1', 'PRIORITY_2', 'PRIORITY_3', 'PRIORITY_4', 'PRIORITY_5']
for col in priority_columns:
    df[col].fillna('Unknown', inplace=True)
df = pd.get_dummies(df, columns=priority_columns)
priority_dummies = [col for col in df.columns if col.startswith('PRIORITY_')]
mask = (df[priority_dummies] == 0).all(axis=1)
df['PRIORITY_Unknown'] = mask.astype(int)
feature_columns = df.drop(['PID', 'POIs'], axis=1).columns

# Create a DataFrame for the user's feature vector with all zeros
user_feature_vector = pd.DataFrame(0, index=[0], columns=feature_columns)

# Set the appropriate priority columns to 1 based on user input
for i, priority in enumerate(user_priorities, start=1):
    if priority != 'Unknown':
        column_name = f'PRIORITY_{i}_{priority}'
        if column_name in user_feature_vector.columns:
            user_feature_vector.at[0, column_name] = 1

# If all priorities are 'Unknown', set 'PRIORITY_Unknown' to 1
if all(priority == 'Unknown' for priority in user_priorities):
    user_feature_vector.at[0, 'PRIORITY_Unknown'] = 1

# Display the user's feature vector
print("\nUser Feature Vector:")
print(user_feature_vector)

# Save the feature columns
np.save('feature_columns.npy', X_train.columns)
print("\nFeature columns have been saved as 'feature_columns.npy'.")


# Ensure the feature vector has the same columns as the model expects
missing_cols = set(feature_columns) - set(user_feature_vector.columns)
for col in missing_cols:
    user_feature_vector[col] = 0
user_feature_vector = user_feature_vector[feature_columns]

# Convert the user's features to a NumPy array and ensure it's float
user_features = user_feature_vector.astype(float).values

# Predict the probabilities of all POIs for the user
predictions = loaded_model.predict(user_features)

# Define the number of top recommendations
N = 5  # Adjust this value as needed

# Get the indices of the top N recommended POIs
top_indices = np.argsort(predictions[0])[::-1][:N]

# Decode the recommended POIs using the loaded label encoder
recommended_pois = loaded_label_encoder.inverse_transform(top_indices)

# Print the recommended POIs with their probabilities
print("\nRecommended POIs:")
for idx, poi in enumerate(recommended_pois, start=1):
    probability = predictions[0][top_indices[idx-1]]
    print(f"{idx}. {poi} (Probability: {probability:.2f})")


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna('Unknown', inplace=True)



Model has been loaded from 'poi_model.h5'.
LabelEncoder has been loaded from 'label_encoder.joblib'.

User Feature Vector:
   PRIORITY_1_Adventure  PRIORITY_1_Food and Drinks  \
0                     0                           0   

   PRIORITY_1_History and Culture  PRIORITY_1_Local Experiences  \
0                               0                             0   

   PRIORITY_1_Museum  PRIORITY_1_Religious  PRIORITY_1_Religious Site  \
0                  0                     0                          0   

   PRIORITY_1_Scenic  PRIORITY_1_Shopping  PRIORITY_1_Shows and Concerts  ...  \
0                  1                    0                              0  ...   

   PRIORITY_5_Food and Drinks  PRIORITY_5_History and Culture  \
0                           0                               0   

   PRIORITY_5_Local Experiences  PRIORITY_5_Museum  PRIORITY_5_Religious  \
0                             0                  0                     0   

   PRIORITY_5_Scenic  PRIORITY_5_Sho


Feature columns have been saved as 'feature_columns.npy'.
