## Import library

In [61]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

## Read dataset

In [62]:
df = pd.read_csv('/content/data_tempat_wisata.csv')
df.head()

Unnamed: 0,jenis,nama,rating,deskripsi
0,rekreasi,Pulau kemaro,4.5,Pulau yang dipenuhi pohon hijau di dalam delta...
1,rekreasi,Benteng kuto besak,4.5,"Benteng, museum bersejarah, & tempat hangout d..."
2,rekreasi,Keliling sungai musi,4.6,Keliling sungai musi menggunakan perahu ketek ...
3,kuliner,Warung terapung,4.3,Tempat makan terapung yang berada di sungai mu...
4,kuliner,Riverside,4.6,Restaurant yang berlokasi di monpera berada di...


## Data cleaning

In [63]:
def remove_excessive_spaces(value):
    if isinstance(value, str):
        return ' '.join(value.split())
    else:
        return value

In [64]:
# Apply the function to all elements in the DataFrame
df_cleaned = df.applymap(remove_excessive_spaces)

In [65]:
df_cleaned.head()

Unnamed: 0,jenis,nama,rating,deskripsi
0,rekreasi,Pulau kemaro,4.5,Pulau yang dipenuhi pohon hijau di dalam delta...
1,rekreasi,Benteng kuto besak,4.5,"Benteng, museum bersejarah, & tempat hangout d..."
2,rekreasi,Keliling sungai musi,4.6,Keliling sungai musi menggunakan perahu ketek ...
3,kuliner,Warung terapung,4.3,Tempat makan terapung yang berada di sungai mu...
4,kuliner,Riverside,4.6,Restaurant yang berlokasi di monpera berada di...


## Encoding the type

In [66]:
label_encoder = LabelEncoder()
df['jenis_encoded'] = label_encoder.fit_transform(df['jenis'])

In [67]:
df.head()

Unnamed: 0,jenis,nama,rating,deskripsi,jenis_encoded
0,rekreasi,Pulau kemaro,4.5,Pulau yang dipenuhi pohon hijau di dalam delta...,3
1,rekreasi,Benteng kuto besak,4.5,"Benteng, museum bersejarah, & tempat hangout d...",3
2,rekreasi,Keliling sungai musi,4.6,Keliling sungai musi menggunakan perahu ketek ...,3
3,kuliner,Warung terapung,4.3,Tempat makan terapung yang berada di sungai mu...,2
4,kuliner,Riverside,4.6,Restaurant yang berlokasi di monpera berada di...,2


## Make Vectorization

In [68]:
# TF-IDF Vectorization for 'deskripsi'
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix_deskripsi = tfidf_vectorizer.fit_transform(df['deskripsi'])

In [69]:
# Combine TF-IDF features with 'type_encoded'
combined_features = pd.concat([pd.DataFrame(tfidf_matrix_deskripsi.toarray()), df['jenis_encoded']], axis=1)

## Compute the cosine similarity

In [70]:
# Compute the cosine similarity matrix
cosine_sim = cosine_similarity(combined_features, combined_features)

## Make function recommendation

In [82]:
def get_recommendations(user_preferences, cosine_sim_matrix, df):
    # Filter destinations based on user preferences
    filtered_df = df[df['jenis'].isin(user_preferences)]

    if filtered_df.empty:
        return []  # If no destinations match the user preferences, return an empty list

    # Get the indices of the filtered destinations
    indices = filtered_df.index.tolist()

    # Get recommendations based on the first destination in the filtered dataset
    idx = indices[0]
    sim_scores = list(enumerate(cosine_sim_matrix[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:4]  # Get the top 3 similar destinations (excluding itself)

    destination_indices = [i[0] for i in sim_scores]
    return df['nama'].iloc[destination_indices].tolist()

## Testing the reccomendation

In [89]:
preference = ['rekreasi']

In [90]:
# Example: Get recommendations for 'Pulau kemaro'
recommendations = get_recommendations(preference, cosine_sim, df_cleaned)

In [91]:
print(recommendations)

['Benteng kuto besak', 'Keliling sungai musi', 'Taman Kambang Iwak Besak']
