In [1]:
import pandas as pd
import io
import numpy as np

# --- Step 1: Load and Preprocess Hotel Data ---
hotel_data_url = 'Group4_Part1_preprocessed.csv'
try:
    df_hotels = pd.read_csv(hotel_data_url)
    df_hotels = df_hotels[['itemid', 'hotelname', 'hotelid']].rename(
        columns={'itemid': 'Item ID', 'hotelname': 'Hotel Name'}
    )
    df_hotels['Hotel ID'] = df_hotels['hotelid'].astype(str)
    df_hotels = df_hotels[['Item ID', 'Hotel Name', 'Hotel ID']]
    df_hotels = df_hotels.drop_duplicates(subset=['Item ID', 'Hotel Name', 'Hotel ID']).reset_index(drop=True)
except Exception as e:
    print(f"Error in Step 1: {e}")
    df_hotels = pd.DataFrame()

# --- Step 2: Load and Process the Similarity Data ---
similarity_data_url = 'user_cosine_similarity_matrix.csv'
try:
    df_similarity = pd.read_csv(similarity_data_url)
except Exception as e:
    print(f"Error loading similarity data from URL: {e}")
    df_similarity = pd.DataFrame()

df_similarity = df_similarity.replace('VISITED', np.nan)

user_columns = ['user_1', 'user_2', 'user_3', 'user_4', 'user_5']
for col in user_columns:
    if col in df_similarity.columns:
        df_similarity[col] = pd.to_numeric(df_similarity[col], errors='coerce')
    else:
        print(f"Warning: Column '{col}' not found in similarity data.")

df_melted_similarity = df_similarity.melt(
    id_vars=['itemid', 'hotelid'],
    value_vars=user_columns,
    var_name='User',
    value_name='Similarity'
)

df_melted_similarity.rename(columns={'itemid': 'Item ID', 'hotelid': 'Hotel ID_from_sim'}, inplace=True)
df_melted_similarity['User'] = df_melted_similarity['User'].str.replace('user_', 'User ').str.strip()

df_melted_similarity.dropna(subset=['Similarity'], inplace=True)

# --- Step 3: Merging Data, Generating Top 10 Recommendations, and Saving ---
try:
    df_hotels
    df_melted_similarity
except NameError:
    print("Error: df_hotels or df_melted_similarity not defined. Ensure Steps 1 and 2 ran correctly.")
    df_hotels = pd.DataFrame(columns=['Item ID', 'Hotel Name', 'Hotel ID'])
    df_melted_similarity = pd.DataFrame(columns=['Item ID', 'User', 'Similarity'])

df_merged = pd.merge(
    df_melted_similarity,
    df_hotels,
    left_on='Item ID',
    right_on='Item ID',
    how='inner'
)

df_sorted = df_merged.sort_values(by=['User', 'Similarity'], ascending=[True, False])

top_n = 10
df_top_recommendations = df_sorted.groupby('User').head(n=top_n).reset_index(drop=True)

df_top_recommendations.rename(columns={'User': 'User ID'}, inplace=True)

df_final_recommendations = df_top_recommendations[[
    'User ID',
    'Item ID',
    'Hotel ID',
    'Hotel Name',
    'Similarity'
]]

output_filename = 'Group4_Part2_Recommendation23.csv'
df_final_recommendations.to_csv(output_filename, index=False)

print(f"Process complete. Recommendations saved to '{output_filename}'.")

Process complete. Recommendations saved to 'Group4_Part2_Recommendation23.csv'.
