In [36]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import haversine_distances, euclidean_distances
from math import radians


In [37]:
file_path = 'Dataset_Model.xlsx'
sheets =  pd.read_excel(file_path, sheet_name=None)
dataframes = []

In [38]:
for sheet_name, sheet_df in sheets.items():
  sheet_df['city'] = sheet_name
  dataframes.append(sheet_df)


In [39]:
#Menggabungkan semua sheet menjadi satu dataframe
df = pd.concat(dataframes, ignore_index=True)

In [40]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Nama Tempat  168 non-null    object 
 1   Longitude    168 non-null    float64
 2   Latitude     168 non-null    float64
 3   Rating       168 non-null    float64
 4   Foto         2 non-null      object 
 5   city         168 non-null    object 
dtypes: float64(3), object(3)
memory usage: 8.0+ KB


In [41]:
scaler = MinMaxScaler()
pd_normalized = scaler.fit_transform(df[['Latitude', 'Longitude']])
df_normalized = pd.DataFrame(pd_normalized)
df_normalized.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       168 non-null    float64
 1   1       168 non-null    float64
dtypes: float64(2)
memory usage: 2.8 KB


In [42]:
latitude = df['Latitude'].values
longitude = df['Longitude'].values
categories = df['city'].values

In [43]:
# Define the autoencoder model
input_dim = df_normalized.shape[1]  # Number of features (2: lat, lon)
encoding_dim = 2  # Latent space dimension (can adjust this)

# Input layer
input_layer = Input(shape=(input_dim,))
# Encoding layers
encoded = Dense(128, activation='relu')(input_layer)
encoded = Dense(encoding_dim, activation='relu')(encoded)
# Decoding layers
decoded = Dense(256, activation='relu')(encoded)
decoded = Dense(input_dim, activation='sigmoid')(decoded)

# Autoencoder model
autoencoder = Model(input_layer, decoded)

# Encoder model (for embeddings)
encoder = Model(input_layer, encoded)

# Compile the autoencoder
autoencoder.compile(optimizer='adam', loss='mse',metrics=['accuracy'])

# Train the autoencoder
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
autoencoder.fit(df_normalized, df_normalized, epochs=100, batch_size=32, verbose=1, callbacks=[early_stopping])


Epoch 1/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - accuracy: 0.6214 - loss: 0.1416
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 12ms/step - accuracy: 0.5884 - loss: 0.1372
Epoch 3/100


  current = self.get_monitor_value(logs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5913 - loss: 0.1254
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5853 - loss: 0.1154
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5468 - loss: 0.1045
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5843 - loss: 0.0932
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5895 - loss: 0.0802
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.5563 - loss: 0.0701
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5468 - loss: 0.0629
Epoch 10/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5840 - loss: 0.0586
Epoch 11/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 

<keras.src.callbacks.history.History at 0x28803de5690>

In [44]:
embeddings = encoder.predict(pd_normalized)
embedding_df = pd.DataFrame(embeddings, columns=['dim1', 'dim2'])
print(embedding_df)

[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step
         dim1      dim2
0    2.508492  2.104635
1    2.484562  2.090181
2    2.587116  2.174954
3    2.566820  2.162894
4    2.615037  2.207608
..        ...       ...
163  1.041334  0.644801
164  1.074602  0.677345
165  1.013855  0.627184
166  1.097704  0.702221
167  1.095774  0.695948

[168 rows x 2 columns]


In [45]:
# Cluster embeddings
kmeans = KMeans(n_clusters=3, random_state=42)
df['cluster'] = kmeans.fit_predict(embeddings)

In [50]:
def find_nearest_locations(new_location, df, kmeans, encoder, scaler, n_neighbors=5):
    # Normalisasi dan encode lokasi baru
    new_location_arr = np.array([new_location])
    new_location_normalized = scaler.transform(new_location_arr)
    new_location_embedding = encoder.predict(new_location_normalized)
    
    # Prediksi cluster
    new_cluster = kmeans.predict(new_location_embedding)[0]
    
    # Konversi koordinat ke radian untuk perhitungan Haversine
    def prepare_coordinates(lat, lon):
        return np.array([[radians(lat), radians(lon)]])
    
    new_loc_radians = prepare_coordinates(new_location[0], new_location[1])
    
    # Filter data berdasarkan cluster dan cluster terdekat
    cluster_radius = 1  # Radius cluster untuk pencarian (bisa disesuaikan)
    nearby_clusters = np.where(
        euclidean_distances(kmeans.cluster_centers_[new_cluster].reshape(1, -1), 
                          kmeans.cluster_centers_) < cluster_radius)[1]
    
    potential_locations = df[df['cluster'].isin(nearby_clusters)].copy()
    
    if len(potential_locations) == 0:
        potential_locations = df[df['cluster'] == new_cluster].copy()
    
    # Hitung jarak Haversine untuk lokasi yang potensial
    locations_radians = np.radians(
        potential_locations[['Latitude', 'Longitude']].values
    )
    
    # Hitung jarak dalam kilometer (radius bumi = 6371 km)
    distances = haversine_distances(new_loc_radians, locations_radians)[0] * 6371
    
    # Tambahkan jarak ke DataFrame dan urutkan
    potential_locations['distance_km'] = distances
    nearest_locations = potential_locations.nsmallest(n_neighbors, 'distance_km')
    
    # Format output
    result = nearest_locations.copy()
    result['distance_km'] = result['distance_km'].round(2)
    
    return result
Lat_inp=float(input('Masukkan Latitude Lokas: '))
Log_inp=float(input('Masukkan Longitude Lokasi: '))
new_location = [Lat_inp,Log_inp]
nearest_locations = find_nearest_locations(
    new_location=new_location,
    df=df,
    kmeans=kmeans,
    encoder=encoder,
    scaler=scaler,
    n_neighbors=5
)

print("Lokasi terdekat yang ditemukan:")
print(nearest_locations[['Nama Tempat', 'Latitude', 'Longitude', 'distance_km']])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 51ms/step
Lokasi terdekat yang ditemukan:
                         Nama Tempat  Latitude   Longitude  distance_km
44                      ECOMSURABAYA -7.324844  112.796866      1054.38
10                    MACSUS Company -7.285509  112.802264      1055.10
41                        m-computer -7.283649  112.801490      1055.24
65              Rytech Comp Surabaya -7.284759  112.799222      1055.44
54  Alpu Service Laptop dan Komputer -7.283019  112.797904      1055.64


