In [1]:
from dotenv import load_dotenv
import os
import pandas as pd
import numpy as np
import json

In [2]:
# Memuat variabel lingkungan dari file .env
load_dotenv()

# Mengakses variabel lingkungan yang diatur dalam file .env
project_path = os.getenv("PROJECT_FOLDER")
dataset_folder = 'data'

In [7]:
places_df = pd.read_json(f"{project_path}/{dataset_folder}/processed_data/json/todo_add.json")

In [28]:
places_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 118 entries, 0 to 117
Data columns (total 15 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   location_id     118 non-null    int64  
 1   place_name      118 non-null    object 
 2   city            118 non-null    object 
 3   rating          118 non-null    float64
 4   latitude        115 non-null    float64
 5   longitude       115 non-null    float64
 6   opening_hours   109 non-null    object 
 7   contact_number  50 non-null     object 
 8   photo           104 non-null    object 
 9   address         118 non-null    object 
 10  min_price       118 non-null    int64  
 11  max_price       118 non-null    int64  
 12  activity        118 non-null    int64  
 13  category        118 non-null    object 
 14  description     118 non-null    object 
dtypes: float64(3), int64(4), object(8)
memory usage: 14.0+ KB


In [71]:
import re

def clean_data(data):
    cleaned_data = []
    
    for index, item in data.iterrows():
        cleaned_item = {}
        
        # Membersihkan place_name
        cleaned_item['place_name'] = item['place_name']
        
        # Membersihkan rating
        rating = item['rating']
        cleaned_item['rating'] = float(rating) if rating is not None else None
        
        # Membersihkan latitude dan longitude
        cleaned_item['latitude'] = float(item['latitude'])
        cleaned_item['longitude'] = float(item['longitude'])
        
        # Membersihkan opening_hours
        cleaned_item['opening_hours'] = item['opening_hours']
        
        # Membersihkan address
        cleaned_item['address'] = item['address']
        
        # Membersihkan min_price dan max_price
        cleaned_item['min_price'] = float(item['min_price'])
        cleaned_item['max_price'] = float(item['max_price'])
        
        # Membersihkan activity dan category
        cleaned_item['activity'] = item['activity']
        cleaned_item['category'] = item['category']
        
        # Membersihkan description
        description = item['description']
        cleaned_item['description'] = re.sub(r'\s+', ' ', description).strip()
        
        cleaned_data.append(cleaned_item)
    
    return cleaned_data

In [72]:
cleaned_data = clean_data(places_df)

In [76]:
print(cleaned_data)

[{'place_name': 'Manggar Segarasari Beach', 'rating': 3.5, 'latitude': -1.212555, 'longitude': 116.98106, 'opening_hours': {'week_ranges': [[{'open_time': 800, 'close_time': 1600}], [{'open_time': 800, 'close_time': 1600}], [{'open_time': 800, 'close_time': 1600}], [{'open_time': 800, 'close_time': 1600}], [{'open_time': 800, 'close_time': 1600}], [{'open_time': 800, 'close_time': 1600}], [{'open_time': 800, 'close_time': 1600}]], 'timezone': 'Asia/Makassar'}, 'address': 'Kelurahan Manggar, Balikpapan Timur, Balikpapan, Kalimantan Timur, Indonesia, 76116', 'min_price': 5000.0, 'max_price': 30000.0, 'activity': 2, 'category': 'Beach', 'description': 'Pantai Manggar Segarasari atau biasa disebut dengan Pantai Manggar merupakan salah satu tujuan wisata favorit bagi masyarakat kota Balikpapan. Pantai ini terletak di Kelurahan Manggar dan Teritip, Balikpapan, Kalimantan Timur. Luas wilayahnya kurang lebih sekitar 13.000 meter persegi. Pantai ini mempunyai air laut yang biru dan hamparan pas

In [77]:
import math

def calculate_distance(lat1, lon1, lat2, lon2):
    # Menghitung jarak antara dua titik koordinat menggunakan Haversine formula
    R = 6371  # Radius bumi dalam kilometer
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2) * math.sin(dlat / 2) + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2) * math.sin(dlon / 2)
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = R * c
    return distance

In [78]:
def content_based_recommendation(data, user_preferences):
    recommendations = []
    for place in data:
        # Memeriksa kesesuaian preferensi pengguna dengan atribut-atribut tempat wisata
        if place['activity'] == user_preferences['activity'] and place['category'] == user_preferences['category']:
            distance = calculate_distance(float(place['latitude']), float(place['longitude']), user_preferences['latitude'], user_preferences['longitude'])
            place['distance'] = distance
            recommendations.append(place)
    
    # Mengurutkan rekomendasi berdasarkan jarak terdekat
    recommendations.sort(key=lambda x: x['distance'])
    
    return recommendations

In [92]:
def recommend_activity_category(latitude, longitude):
    recommendations = []
    for index, location in places_df.iterrows():
        if location['latitude'] == latitude and location['longitude'] == longitude:
            recommendations.append(location)
    return recommendations

In [93]:
recommended_locations = recommend_activity_category("-1.212555", "116.98106")

In [94]:
print(recommended_locations)

[]
