In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler

# Load the dataset
def load_data(file_path):
    return pd.read_csv(file_path)

# Recommendation function based on location, maximum price, reviews, and ratings
def recommend_accommodations(data, location, max_price):
    # Filter data based on location and price
    data = data.loc[(data['Location'] == location) & (data['Price'] <= max_price)].copy()
    
    # Ensure 'Review' column contains only strings
    data.loc[:, 'Review'] = data['Review'].apply(lambda x: str(x) if pd.notnull(x) else '')
    
    # Apply TF-IDF to 'Review' column
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(data['Review'])
    
    # Normalize 'Rating' column
    scaler = MinMaxScaler()
    data.loc[:, 'Rating'] = scaler.fit_transform(data[['Rating']])
    
    # Concatenate TF-IDF features and 'Rating' column
    features = pd.concat([pd.DataFrame(tfidf_matrix.toarray()), data['Rating'].reset_index(drop=True)], axis=1)
    
    # Compute cosine similarity matrix
    cosine_sim = cosine_similarity(features, features)
    
    # Get the top 5 most similar accommodations
    sim_scores = list(enumerate(cosine_sim[0]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]  # Exclude the first one because it's the same accommodation
    
    # Get the accommodation indices
    accommodation_indices = [i[0] for i in sim_scores]
    
    # Return the top 5 most similar accommodations
    return data.iloc[accommodation_indices]

# Main function to run the recommendation system
def main():
    # Path to the dataset
    file_path = '../data/accomadations.csv'  # Adjust the path as needed
    
    # Load the dataset
    accommodations_data = load_data(file_path)
    
    # Example input parameters
    location = "Galle"
    max_price = 500
    
    # Get recommendations
    recommendations = recommend_accommodations(accommodations_data, location, max_price)
    
    # Print the recommendations
    print(recommendations)

if __name__ == "__main__":
    main()

                         Name Location  Price  Rating Review
7              Le Grand Galle    Galle    173     0.0   1041
10  Radisson Blu Resort Galle    Galle    125     0.0   2305
