# **Import Library**

Pada cell ini, berbagai library yang dibutuhkan untuk analisis dan pengembangan model diimpor.

In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Input, Dense, Embedding, Flatten, Concatenate, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk
import re
import logging
from datetime import datetime

# **DataLoader Class**

Kelas DataLoader bertugas untuk memuat dan memproses data wisata dan hotel:

**Inisialisas**i: Memuat dataset dari file CSV dan menginisialisasi encoder untuk kategori dan region, serta scaler untuk harga dan rating.

**Preprocessing**:
Data wisata dan hotel dibersihkan dan diubah menjadi tipe numerik.
Kategori wisata dan region hotel dienkode menggunakan LabelEncoder.
Harga dan rating dinormalisasi menggunakan MinMaxScaler.
Dengan demikian, data siap digunakan untuk analisis atau pemodelan lebih lanjut.

In [3]:
class DataLoader:
    def __init__(self):
        # Load dataset
        self.tourism_data = pd.read_csv('cleaned_dataset_wisata.csv')
        self.hotel_data = pd.read_csv('cleaned_dataset_hotel.csv')

        # Initialize encoders and scalers
        self.tourism_category_encoder = LabelEncoder()
        self.hotel_region_encoder = LabelEncoder()
        self.price_scaler = MinMaxScaler()
        self.rating_scaler = MinMaxScaler()

        # Preprocess data
        self.preprocess_data()

    def preprocess_data(self):
        # Clean and encode tourism data
        self.tourism_data['Price'] = pd.to_numeric(self.tourism_data['Price'], errors='coerce')
        self.tourism_data['Rating'] = pd.to_numeric(self.tourism_data['Rating'], errors='coerce')
        self.tourism_data['Category_encoded'] = self.tourism_category_encoder.fit_transform(self.tourism_data['Category'])
        self.tourism_data['Price_scaled'] = self.price_scaler.fit_transform(self.tourism_data[['Price']])
        self.tourism_data['Rating_scaled'] = self.rating_scaler.fit_transform(self.tourism_data[['Rating']])

        # Clean and encode hotel data
        self.hotel_data['originalRate_perNight_totalFare'] = pd.to_numeric(
            self.hotel_data['originalRate_perNight_totalFare'], errors='coerce')
        self.hotel_data['starRating'] = pd.to_numeric(self.hotel_data['starRating'], errors='coerce')
        self.hotel_data['userRating'] = pd.to_numeric(self.hotel_data['userRating'], errors='coerce')
        self.hotel_data['region_encoded'] = self.hotel_region_encoder.fit_transform(self.hotel_data['region'])


# **RecommenderModel Class**

Kelas ini digunakan untuk membangun dan melatih model rekomendasi:

**Inisialisasi**: Membangun model dengan input numerik dan kategori.

**build_model**():
Memasukkan fitur numerik dan kategori (menggunakan embedding dan regularisasi).
Menggabungkan keduanya dan melaluinya jaringan saraf dengan layer Dense dan Dropout.
Model dioptimalkan dengan Adam dan loss binary_crossentropy.

**train**(): Melatih model dengan data numerik dan kategori.

**save_model**() dan** load_model**(): Menyimpan dan memuat model yang dilatih.

In [4]:
class RecommenderModel:
    def __init__(self, input_dim, categorical_dim, num_categories):
        # Tambahkan regularization untuk mencegah overfitting
        self.model = self.build_model(input_dim, categorical_dim, num_categories)

    def build_model(self, input_dim, categorical_dim, num_categories):
        # Numerical features input
        numerical_input = Input(shape=(input_dim,))

        # Categorical features input
        categorical_input = Input(shape=(1,))

        # Embedding layer dengan regularisasi
        embedding = Embedding(num_categories, 8,
                              embeddings_regularizer=l2(0.001))(categorical_input)
        flatten = Flatten()(embedding)

        # Combine numerical and categorical features
        concat = Concatenate()([numerical_input, flatten])

        # Deep neural network layers dengan regularisasi
        dense1 = Dense(64, activation='relu', kernel_regularizer=l2(0.001))(concat)
        dropout1 = Dropout(0.3)(dense1)
        dense2 = Dense(32, activation='relu', kernel_regularizer=l2(0.001))(dropout1)
        dropout2 = Dropout(0.2)(dense2)
        output = Dense(1, activation='sigmoid')(dropout2)

        model = Model(inputs=[numerical_input, categorical_input], outputs=output)
        model.compile(optimizer=Adam(learning_rate=0.0005),
                     loss='binary_crossentropy',
                     metrics=['accuracy'])
        return model

    def train(self, X_num, X_cat, y, epochs=500, batch_size=32, validation_split=0.2):
        return self.model.fit(
            [X_num, X_cat],
            y,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=validation_split
        )

    def save_model(self, filepath):
        self.model.save(filepath)

    def load_model(self, filepath):
        self.model = load_model(filepath)


# **TourismRecommender Class**

Kelas ini melatih model rekomendasi wisata dan memberikan rekomendasi berdasarkan kriteria tertentu:

**Inisialisasi**: Menerima data_loader untuk memuat data dan melatih model rekomendasi.

**prepare_training_data**(): Menyiapkan data latih dengan fitur numerik dan kategori, serta membuat target berdasarkan rating.

**train_model**(): Melatih model rekomendasi dan menyimpannya.

**get_recommendations**(): Memberikan rekomendasi wisata berdasarkan filter seperti kategori, harga, dan rating, kemudian mengurutkannya berdasarkan skor berbobot.

In [7]:
class TourismRecommender:
    def __init__(self, data_loader):
        self.data_loader = data_loader
        self.model = None
        self.train_model()

    def prepare_training_data(self):
        tourism_data = self.data_loader.tourism_data
        X_numerical = tourism_data[['Price_scaled', 'Rating_scaled']].values
        X_categorical = tourism_data['Category_encoded'].values
        # Create synthetic target variable based on popularity
        y = (tourism_data['Rating'] > tourism_data['Rating'].mean()).astype(int)
        return train_test_split(X_numerical, X_categorical, y, test_size=0.2)

    def train_model(self):
        X_num_train, X_num_test, X_cat_train, X_cat_test, y_train, y_test = self.prepare_training_data()
        num_categories = len(self.data_loader.tourism_category_encoder.classes_)

        self.model = RecommenderModel(input_dim=2, categorical_dim=1, num_categories=num_categories)
        self.model.train(X_num_train, X_cat_train, y_train)
        self.model.save_model('tourism_recommender.h5')

    def get_recommendations(self, category=None, min_price=None, max_price=None, min_rating=None):
        recommendations = self.data_loader.tourism_data.copy()

        # Implementasi filter yang lebih fleksibel
        if category:
            # Gunakan pencarian substring untuk kategori
            recommendations = recommendations[
                recommendations['Category'].str.contains(category, case=False, na=False)
            ]

        if min_price is not None and max_price is not None:
            recommendations = recommendations[
                (recommendations['Price'] >= min_price) &
                (recommendations['Price'] <= max_price)
            ]

        if min_rating is not None:
            recommendations = recommendations[recommendations['Rating'] >= min_rating]

        # Perhitungan skor yang lebih komplek
        X_num = recommendations[['Price_scaled', 'Rating_scaled']].values
        X_cat = recommendations['Category_encoded'].values
        predictions = self.model.model.predict([X_num, X_cat])

        recommendations['pred_score'] = predictions
        recommendations['weighted_score'] = (
            recommendations['pred_score'] * 0.5 +
            recommendations['Rating_scaled'] * 0.3 +
            (1 - recommendations['Price_scaled']) * 0.2
        )

        recommendations = recommendations.sort_values('weighted_score', ascending=False)

        return recommendations[['Place_Name', 'Category', 'City', 'Price', 'Rating', 'Coordinate']].head(5)


# HotelRecommender Class

Kelas ini melatih model rekomendasi hotel dan memberikan rekomendasi berdasarkan kriteria tertentu:

**Inisialisasi**: Menerima data_loader untuk memuat data dan melatih model rekomendasi.

**prepare_training_data**(): Menyiapkan data latih dengan fitur numerik (rating bintang, rating pengguna, dan tarif) serta kategori (region hotel), dan membuat target berdasarkan rating pengguna.

**train_model**(): Melatih model rekomendasi dan menyimpannya.

**get_recommendations**(): Memberikan rekomendasi hotel berdasarkan filter seperti rating bintang, harga, rating pengguna, dan region, kemudian mengurutkannya berdasarkan skor berbobot.


In [8]:
class HotelRecommender:
    def __init__(self, data_loader):
        self.data_loader = data_loader
        self.model = None
        self.train_model()

    def prepare_training_data(self):
        hotel_data = self.data_loader.hotel_data
        X_numerical = np.column_stack((
            hotel_data['starRating'],
            hotel_data['userRating'],
            hotel_data['originalRate_perNight_totalFare']
        ))
        X_categorical = hotel_data['region_encoded'].values
        # Create synthetic target variable based on user ratings
        y = (hotel_data['userRating'] > hotel_data['userRating'].mean()).astype(int)
        return train_test_split(X_numerical, X_categorical, y, test_size=0.2)

    def train_model(self):
        X_num_train, X_num_test, X_cat_train, X_cat_test, y_train, y_test = self.prepare_training_data()
        num_regions = len(self.data_loader.hotel_region_encoder.classes_)

        self.model = RecommenderModel(input_dim=3, categorical_dim=1, num_categories=num_regions)
        self.model.train(X_num_train, X_cat_train, y_train)
        self.model.save_model('hotel_recommender.h5')

    def get_recommendations(self, star_rating=None, min_price=None, max_price=None, min_user_rating=None, region=None):
        recommendations = self.data_loader.hotel_data.copy()

        # Filter yang lebih fleksibel
        if star_rating:
            recommendations = recommendations[recommendations['starRating'] == star_rating]

        if min_price is not None and max_price is not None:
            recommendations = recommendations[
                (recommendations['originalRate_perNight_totalFare'] >= min_price) &
                (recommendations['originalRate_perNight_totalFare'] <= max_price)
            ]

        if min_user_rating is not None:
            recommendations = recommendations[recommendations['userRating'] >= min_user_rating]

        if region:
            recommendations = recommendations[recommendations['region'].str.contains(region, case=False, na=False)]

        # Perhitungan skor
        X_num = np.column_stack((
            recommendations['starRating'],
            recommendations['userRating'],
            recommendations['originalRate_perNight_totalFare']
        ))
        X_cat = recommendations['region_encoded'].values
        predictions = self.model.model.predict([X_num, X_cat])

        # Tambahkan skor dan urutkan
        recommendations['pred_score'] = predictions
        recommendations['weighted_score'] = (
            recommendations['pred_score'] * 0.4 +
            recommendations['userRating'] * 0.3 +
            (1 - recommendations['originalRate_perNight_totalFare'] / recommendations['originalRate_perNight_totalFare'].max()) * 0.3
        )

        recommendations = recommendations.sort_values('weighted_score', ascending=False)

        return recommendations[['name', 'region', 'starRating', 'userRating', 'originalRate_perNight_totalFare']].head(5)


# TextPreprocessor Class

Kelas ini memproses teks dan mengekstrak entitas penting:

**preprocess**(): Mengubah teks menjadi lowercase, menghapus tanda baca, dan menghilangkan stopwords.

**extract_entities**(): Mengekstrak entitas seperti kategori, wilayah, rentang harga, rating, dan rating bintang menggunakan pencocokan regex.

In [9]:
class TextPreprocessor:
    def __init__(self):
        nltk.download('punkt')
        nltk.download('stopwords')
        self.stop_words = set(stopwords.words('indonesian'))

    def preprocess(self, text):
        text = text.lower()
        text = re.sub(r'[^\w\s]', '', text)
        tokens = word_tokenize(text)
        tokens = [token for token in tokens if token not in self.stop_words]
        return ' '.join(tokens)

    def extract_entities(self, text):
        entities = {
            'category': None,
            'price_range': None,
            'rating': None,
            'star_rating': None,
            'region': None
        }

        # Perluas daftar kategori dan wilayah
        categories = [
            'budaya', 'taman hiburan', 'bahari', 'cagar alam', 'pusat perbelanjaan',
            'sejarah', 'religi', 'kuliner', 'belanja', 'pantai', 'gunung', 'air terjun'
        ]
        regions = [
            'menteng', 'senayan', 'ancol', 'kemayoran', 'sudirman',
            'jakarta', 'bogor', 'depok', 'tangerang', 'bekasi'
        ]

        # Implementasi pencarian kategori dan wilayah yang lebih fleksibel
        text_lower = text.lower()
        matched_categories = [cat for cat in categories if cat in text_lower]
        matched_regions = [reg for reg in regions if reg in text_lower]

        # Prioritaskan kategori dan wilayah yang paling cocok
        entities['category'] = matched_categories[0] if matched_categories else None
        entities['region'] = matched_regions[0] if matched_regions else None

        # Perbaikan deteksi rentang harga dengan regex yang lebih komprehensif
        price_patterns = [
            (r'harga murah|budget|termurah', (0, 50000)),
            (r'harga sedang|menengah', (50000, 200000)),
            (r'harga mahal|mewah', (200000, float('inf')))
        ]

        for pattern, price_range in price_patterns:
            if re.search(pattern, text_lower):
                entities['price_range'] = price_range
                break

        # Perbaikan deteksi rating dengan regex yang lebih presisi
        rating_patterns = [
            r'rating minimal (\d+(?:\.\d+)?)',
            r'minimal rating (\d+(?:\.\d+)?)',
            r'rating di atas (\d+(?:\.\d+)?)'
        ]

        for pattern in rating_patterns:
            rating_match = re.search(pattern, text_lower)
            if rating_match:
                entities['rating'] = float(rating_match.group(1))
                break

        # Deteksi rating bintang untuk hotel
        star_patterns = [
            r'(\d+) bintang',
            r'hotel bintang (\d+)',
            r'bintang (\d+)'
        ]

        for pattern in star_patterns:
            star_match = re.search(pattern, text_lower)
            if star_match:
                entities['star_rating'] = int(star_match.group(1))
                break

        return entities


# TourismChatbot Class

Kelas ini mengelola interaksi dengan pengguna dan memberikan rekomendasi:

**handle_user_input**(): Menangani input pengguna, mengekstrak entitas, dan memberikan rekomendasi (hotel atau wisata).

**format_recommendations**(): Memformat hasil rekomendasi.

**welcome_message**(): Menyediakan pesan sambutan dan instruksi pengguna

In [None]:
class TourismChatbot:
    def __init__(self):
        self.data_loader = DataLoader()
        self.tourism_recommender = TourismRecommender(self.data_loader)
        self.hotel_recommender = HotelRecommender(self.data_loader)
        self.preprocessor = TextPreprocessor()

        logging.basicConfig(
            filename='chatbot.log',
            level=logging.INFO,
            format='%(asctime)s - %(levelname)s - %(message)s'
        )

    def handle_user_input(self, user_input):
        try:
            entities = self.preprocessor.extract_entities(user_input)

            if 'hotel' in user_input.lower():
                recommendations = self.hotel_recommender.get_recommendations(
                    star_rating=entities['star_rating'],
                    region=entities['region']
                )
            else:
                recommendations = self.tourism_recommender.get_recommendations(
                    category=entities['category']
                )

            return self.format_recommendations(recommendations, 'hotel' in user_input.lower())
        except Exception as e:
            logging.error(str(e))
            return "Maaf, terjadi kesalahan."

    def format_recommendations(self, recommendations, is_hotel):
        if is_hotel:
            return f"Hasil rekomendasi hotel:\n{recommendations.to_string()}"
        return f"Hasil rekomendasi wisata:\n{recommendations.to_string()}"

    def welcome_message(self):
        return """
Selamat datang di Chatbot Wisata dan Hotel!
Anda dapat bertanya seperti:
- Rekomendasikan wisata alam
- Rekomendasikan wisata budaya
- Rekomendasikan hotel bintang 3
- Rekomendasikan wisata termurah

Ketik 'keluar' untuk berhenti.
"""


# main() Function

Fungsi ini menjalankan chatbot dan mengelola interaksi dengan pengguna:

**Inisialisasi Chatbot**: Membuat objek TourismChatbot dan menampilkan pesan sambutan.

**Loop Interaktif**:
Menunggu input dari pengguna dan memprosesnya menggunakan handle_user_input().
Jika pengguna mengetik "keluar", program berhenti.


In [None]:
def main():
    chatbot = TourismChatbot()
    print(chatbot.welcome_message())

    while True:
        user_input = input("Masukkan pertanyaan Anda: ")
        if user_input.lower() == "keluar":
            break
        print(chatbot.handle_user_input(user_input))

if __name__ == "__main__":
    main()