## 1. Import Dataset

In [None]:
import pandas as pd
import geopandas as gpd
import numpy as np
import folium
import matplotlib.pyplot as plt
from scipy.stats import gmean

In [None]:
df = pd.read_csv('Netflix Dataset.csv')

## 2. Membuat Fungsi K-Means dan Menghitung Cluster Center dari 2 atau 3 cluster

In [None]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2)**2))

class KMeans:
    def __init__(self, k, max_iters=100):
        self.k = k
        self.centroids = None
        self.max_iters = max_iters

    def fit(self, X):
        n_samples, n_features = X.shape

        geomeans = gmean(np.abs(X[:, 0:2]), axis=1)
        sorted_indices = np.argsort(geomeans)
        X_sorted = X[sorted_indices]

        split_indices = np.array_split(sorted_indices, self.k)
        centroid_indices = [part[len(part) // 2] for part in split_indices]
        self.centroids = X[centroid_indices]

        self.labels = np.zeros((n_samples,))
        for it in range(self.max_iters):
            for i, sample in enumerate(X):
                distances = [euclidean_distance(sample, centroid) for centroid in self.centroids]
                cluster = np.argmin(distances)
                self.labels[i] = cluster

            centroids_old = self.centroids.copy()

            for i in range(self.k):
                points_in_cluster = [X[j] for j in range(n_samples) if self.labels[j] == i]
                if points_in_cluster:
                    self.centroids[i] = np.mean(points_in_cluster, axis=0)

    def predict(self, X):
        return [np.argmin([euclidean_distance(x, centroid) for centroid in self.centroids]) for x in X]

In [None]:
X = df[['Latitude', 'Longitude', 'Device']].values

In [None]:
print(X)

[[  39.7837304  -100.445882      2.        ]
 [  61.0666922  -107.991707      3.        ]
 [  54.7023545    -3.2765753     1.        ]
 ...
 [  33.68495615 -102.80863133    0.        ]
 [  64.03779427  -72.11359926    3.        ]
 [  38.86423547 -121.86420511    1.        ]]


In [None]:
kmeans = KMeans(k=3, max_iters=100)
kmeans.fit(X)
df['Cluster-K3'] = kmeans.labels

In [None]:
centroids_data = kmeans.centroids
cluster_centers_K2 = pd.DataFrame(centroids_data, columns=['Latitude', 'Longitude', 'Device'])
print("Cluster Centers K2")
print(cluster_centers_K2)

Cluster Centers K2
    Latitude  Longitude    Device
0  35.662511  19.928861  1.466765
1  37.237176 -95.392322  1.540140


In [None]:
print(df)

                       Country  Device   Latitude   Longitude  Cluster-K2
0     United States of America       2  39.783730 -100.445882         1.0
1                       Canada       3  61.066692 -107.991707         1.0
2               United Kingdom       1  54.702354   -3.276575         0.0
3                    Australia       0 -24.776109  134.755000         0.0
4                      Germany       2  51.163818   10.447831         0.0
...                        ...     ...        ...         ...         ...
2495                     Spain       1  37.727507   -5.721174         0.0
2496                     Spain       1  42.386670   -1.339590         0.0
2497  United States of America       0  33.684956 -102.808631         1.0
2498                    Canada       3  64.037794  -72.113599         1.0
2499  United States of America       1  38.864235 -121.864205         1.0

[2500 rows x 5 columns]


In [None]:
kmeans = KMeans(k=3, max_iters=100)
kmeans.fit(X)
df['Cluster-K3'] = kmeans.labels

In [None]:
centroids_data = kmeans.centroids
cluster_centers_K3 = pd.DataFrame(centroids_data, columns=['Latitude', 'Longitude', 'Device'])
print("Cluster Centers K3")
print(cluster_centers_K3)

Cluster Centers K3
    Latitude   Longitude    Device
0  43.795313    0.803132  1.461730
1 -25.441349  135.016222  1.513661
2  38.542283  -96.869289  1.539910


In [None]:
print(df[['Device', 'Latitude', 'Longitude', 'Cluster-K2', 'Cluster-K3']])

      Device   Latitude   Longitude  Cluster-K3
0          2  39.783730 -100.445882         2.0
1          3  61.066692 -107.991707         2.0
2          1  54.702354   -3.276575         0.0
3          0 -24.776109  134.755000         1.0
4          2  51.163818   10.447831         0.0
...      ...        ...         ...         ...
2495       1  37.727507   -5.721174         0.0
2496       1  42.386670   -1.339590         0.0
2497       0  33.684956 -102.808631         2.0
2498       3  64.037794  -72.113599         2.0
2499       1  38.864235 -121.864205         2.0

[2500 rows x 4 columns]


## 4. Visualisasi 2 Cluster

In [None]:
world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

map_center = [cluster_centers_K2['Latitude'].mean(), cluster_centers_K2['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: 'yellow', 1: 'red'}

folium.Choropleth(
    geo_data='world.json',
    name='choropleth',
    data=df,
    columns=['Country', 'Cluster-K2'],
    key_on='feature.properties.name',
    fill_color='YlOrRd',
    fill_opacity=0.8,
    line_opacity=0.5,
    nan_fill_color='white',
    nan_fill_opacity=0,
    legend_name='Cluster'
).add_to(my_map)

for idx, center in cluster_centers_K2.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

folium.LayerControl().add_to(my_map)

my_map

  world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


In [None]:
world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

map_center = [cluster_centers_K2['Latitude'].mean(), cluster_centers_K2['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: 'yellow', 1: 'red'}

folium.Choropleth(
    geo_data='world.json',
    name='choropleth',
    data=df,
    columns=['Country', 'Cluster-K2'],
    key_on='feature.properties.name',
    fill_color='YlOrRd',
    fill_opacity=0.8,
    line_opacity=0.5,
    nan_fill_color='white',
    nan_fill_opacity=0,
    legend_name='Cluster'
).add_to(my_map)

for idx, center in cluster_centers_K2.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

for i in range(len(cluster_centers_K2) - 1):
    point1 = cluster_centers_K2.iloc[i]
    point2 = cluster_centers_K2.iloc[i + 1]
    folium.PolyLine(
        locations=[[point1['Latitude'], point1['Longitude']], [point2['Latitude'], point2['Longitude']]],
        color='black',
        weight=2,
        dash_array='5, 5'
    ).add_to(my_map)

folium.LayerControl().add_to(my_map)

my_map

  world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


In [None]:
world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

map_center = [cluster_centers_K2['Latitude'].mean(), cluster_centers_K2['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: 'yellow', 1: 'red'}

folium.Choropleth(
    geo_data='world.json',
    name='choropleth',
    data=df,
    columns=['Country', 'Cluster-K2'],
    key_on='feature.properties.name',
    fill_color='YlOrRd',
    fill_opacity=0.8,
    line_opacity=0.5,
    nan_fill_color='white',
    nan_fill_opacity=0,
    legend_name='Cluster'
).add_to(my_map)

for idx, center in cluster_centers_K2.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

for i in range(len(cluster_centers_K2) - 1):
    point1 = cluster_centers_K2.iloc[i]
    point2 = cluster_centers_K2.iloc[i + 1]
    folium.PolyLine(
        locations=[[point1['Latitude'], point1['Longitude']], [point2['Latitude'], point2['Longitude']]],
        color='black',
        weight=2,
        dash_array='5, 5'
    ).add_to(my_map)

folium.LayerControl().add_to(my_map)

my_map

  world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


In [None]:
from shapely.geometry import MultiPoint, Point
import geopandas as gpd

cluster_polygons = []

for cluster_id, cluster_data in df.groupby('Cluster-K2'):
    points = MultiPoint(cluster_data[['Longitude', 'Latitude']].values)
    buffer = points.buffer(0.1)
    cluster_polygon = buffer.convex_hull
    cluster_polygons.append({'Cluster-K2': cluster_id, 'geometry': cluster_polygon})

cluster_polygons_gdf = gpd.GeoDataFrame(cluster_polygons)

cluster_polygons_gdf.set_crs(epsg=4326, inplace=True)

cluster_centers = df.groupby('Cluster-K2').agg({'Latitude': 'mean', 'Longitude': 'mean'})
map_center = [df['Latitude'].mean(), df['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: 'yellow', 1: 'red'}

for idx, center in cluster_centers_K2.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)


for cluster_id, cluster_data in cluster_polygons_gdf.iterrows():
    cluster_color = cluster_colors[cluster_data['Cluster-K2']]
    folium.GeoJson(
        cluster_data['geometry'],
        style_function=lambda feature, color=cluster_color: {
            'fillColor': color,
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.5,
        }
    ).add_to(my_map)

for i in range(len(cluster_centers_K2)):
    for j in range(i + 1, len(cluster_centers_K2)):
        point1 = cluster_centers_K2.iloc[i]
        point2 = cluster_centers_K2.iloc[j]
        folium.PolyLine(
            locations=[[point1['Latitude'], point1['Longitude']], [point2['Latitude'], point2['Longitude']]],
            color='black',
            weight=2,
            dash_array='5, 5'
        ).add_to(my_map)

my_map

In [None]:
from shapely.geometry import Point, Polygon, MultiPoint, LineString
import math

world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

square_bounds = [-180, -90, 180, 90]
square_polygon = Polygon([
    (square_bounds[0], square_bounds[1]),
    (square_bounds[0], square_bounds[3]),
    (square_bounds[2], square_bounds[3]),
    (square_bounds[2], square_bounds[1]),
    (square_bounds[0], square_bounds[1])
])

center_lat = (square_bounds[1] + square_bounds[3]) / 2
center_lon = (square_bounds[0] + square_bounds[2]) / 2
center_point = (center_lat, center_lon)

division_line = LineString([(center_lon, square_bounds[1]), (center_lon, square_bounds[3])])

left_polygon = Polygon([
    (square_bounds[0], square_bounds[1]),
    (square_bounds[0], square_bounds[3]),
    (center_lon, square_bounds[3]),
    (center_lon, square_bounds[1]),
    (square_bounds[0], square_bounds[1])
])

right_polygon = Polygon([
    (center_lon, square_bounds[1]),
    (center_lon, square_bounds[3]),
    (square_bounds[2], square_bounds[3]),
    (square_bounds[2], square_bounds[1]),
    (center_lon, square_bounds[1])
])

left_centroid = left_polygon.centroid.coords[0][::-1]
right_centroid = right_polygon.centroid.coords[0][::-1]

my_map = folium.Map(location=[0, 0], zoom_start=2)

cluster_polygons = []

for cluster_id, cluster_data in df.groupby('Cluster-K2'):
    points = MultiPoint(cluster_data[['Longitude', 'Latitude']].values)
    buffer = points.buffer(0.1)
    cluster_polygon = buffer.convex_hull
    cluster_polygons.append({'Cluster-K2': cluster_id, 'geometry': cluster_polygon})

cluster_polygons_gdf = gpd.GeoDataFrame(cluster_polygons)
cluster_polygons_gdf.set_crs(epsg=4326, inplace=True)

cluster_centers = df.groupby('Cluster-K2').agg({'Latitude': 'mean', 'Longitude': 'mean'})
map_center = [df['Latitude'].mean(), df['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: 'yellow', 1: 'orange'}

for idx, center in cluster_centers_K2.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

for cluster_id, cluster_data in cluster_polygons_gdf.iterrows():
    cluster_color = cluster_colors[cluster_data['Cluster-K2']]
    folium.GeoJson(
        cluster_data['geometry'],
        style_function=lambda feature, color=cluster_color: {
            'fillColor': color,
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.5,
        }
    ).add_to(my_map)

for i in range(len(cluster_centers_K2)):
    for j in range(i + 1, len(cluster_centers_K2)):
        point1 = cluster_centers_K2.iloc[i]
        point2 = cluster_centers_K2.iloc[j]
        folium.PolyLine(
            locations=[[point1['Latitude'], point1['Longitude']], [point2['Latitude'], point2['Longitude']]],
            color='black',
            weight=2,
            dash_array='5, 5'
        ).add_to(my_map)

folium.GeoJson(
    square_polygon,
    style_function=lambda feature: {
        'fillColor': 'transparent',
        'color': 'blue',
        'weight': 2
    }
).add_to(my_map)

folium.Marker(
    location=center_point,
    icon=folium.Icon(color='black', icon='info-sign'),
    popup='Center Point'
).add_to(my_map)

folium.PolyLine(
    locations=[(square_bounds[1], center_lon), (square_bounds[3], center_lon)],
    color="black"
).add_to(my_map)

folium.GeoJson(
    left_polygon,
    style_function=lambda feature: {
        'fillColor': 'transparent',
        'color': 'black',
        'weight': 2
    }
).add_to(my_map)

folium.GeoJson(
    right_polygon,
    style_function=lambda feature: {
        'fillColor': 'transparent',
        'color': 'black',
        'weight': 2
    }
).add_to(my_map)

folium.Marker(
    location=left_centroid,
    icon = folium.Icon(color='white', icon_color='orange', icon='map-marker', prefix='fa'),
    popup='Centroid 1'
).add_to(my_map)


folium.Marker(
    location=right_centroid,
    icon = folium.Icon(color='white', icon_color='yellow', icon='map-marker', prefix='fa'),
    popup='Right Centroid'
).add_to(my_map)

centroids_data = {
    'Latitude': [left_centroid[0], right_centroid[0]],
    'Longitude': [left_centroid[1], right_centroid[1]]
}

centers_k2 = pd.DataFrame(centroids_data)

my_map

  world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


In [None]:
import geopandas as gpd
import folium
from shapely.geometry import Point, Polygon, MultiPoint

world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

left_polygon = Polygon([
    (square_bounds[0], square_bounds[1]),
    (square_bounds[0], square_bounds[3]),
    (center_lon, square_bounds[3]),
    (center_lon, square_bounds[1]),
    (square_bounds[0], square_bounds[1])
])

right_polygon = Polygon([
    (center_lon, square_bounds[1]),
    (center_lon, square_bounds[3]),
    (square_bounds[2], square_bounds[3]),
    (square_bounds[2], square_bounds[1]),
    (center_lon, square_bounds[1])
])

left_centroid = left_polygon.centroid.coords[0][::-1]
right_centroid = right_polygon.centroid.coords[0][::-1]

my_map = folium.Map(location=[0, 0], zoom_start=2)
cluster_polygons = []

for cluster_id, cluster_data in df.groupby('Cluster-K2'):
    points = MultiPoint(cluster_data[['Longitude', 'Latitude']].values)
    buffer = points.buffer(0.1)
    cluster_polygon = buffer.convex_hull
    cluster_polygons.append({'Cluster-K2': cluster_id, 'geometry': cluster_polygon})

cluster_polygons_gdf = gpd.GeoDataFrame(cluster_polygons)
cluster_polygons_gdf.set_crs(epsg=4326, inplace=True)

cluster_centers = df.groupby('Cluster-K2').agg({'Latitude': 'mean', 'Longitude': 'mean'})
map_center = [df['Latitude'].mean(), df['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: 'green', 1: 'orange'}

for idx, center in cluster_centers.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

for cluster_id, cluster_data in cluster_polygons_gdf.iterrows():
    cluster_color = cluster_colors[cluster_data['Cluster-K2']]
    folium.GeoJson(
        cluster_data['geometry'],
        style_function=lambda feature, color=cluster_color: {
            'fillColor': color,
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.5,
        }
    ).add_to(my_map)

square_polygon = Polygon([
    (square_bounds[0], square_bounds[1]),
    (square_bounds[0], square_bounds[3]),
    (square_bounds[2], square_bounds[3]),
    (square_bounds[2], square_bounds[1]),
    (square_bounds[0], square_bounds[1])
])
folium.GeoJson(
    square_polygon,
    style_function=lambda feature: {
        'fillColor': 'transparent',
        'color': 'blue',
        'weight': 2
    }
).add_to(my_map)

center_point = [sum(x)/2 for x in zip(*[square_polygon.exterior.coords[0], square_polygon.exterior.coords[2]])]
folium.Marker(
    location=center_point,
    icon=folium.Icon(color='black', icon='info-sign'),
    popup='Center Point'
).add_to(my_map)

folium.PolyLine(
    locations=[(square_bounds[1], center_lon), (square_bounds[3], center_lon)],
    color="black"
).add_to(my_map)

for polygon, color in [(left_polygon, 'black'), (right_polygon, 'black')]:
    folium.GeoJson(
        polygon,
        style_function=lambda feature, color=color: {
            'fillColor': 'transparent',
            'color': color,
            'weight': 2
        }
    ).add_to(my_map)

for centroid, color, label in [(left_centroid, 'orange', 'Centroid 1'), (right_centroid, 'green', 'Centroid 2')]:
    folium.Marker(
        location=centroid,
        icon=folium.Icon(color=color, icon='info-sign'),
        popup=label
    ).add_to(my_map)

point1 = cluster_centers_K2.iloc[0]
point2 = cluster_centers_K2.iloc[1]
folium.PolyLine(
    locations=[left_centroid, [point2['Latitude'], point2['Longitude']]],
    color='blue',
    weight=2,
    dash_array='5, 5'
).add_to(my_map)

folium.PolyLine(
    locations=[right_centroid, [point1['Latitude'], point1['Longitude']]],
    color='blue',
    weight=2,
    dash_array='5, 5'
).add_to(my_map)

my_map

  world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


## 5. Perhitungan Haversine Formula 2 CLuster

In [None]:
centroids_data = {
    'Latitude': [left_centroid[0], right_centroid[0]],
    'Longitude': [left_centroid[1], right_centroid[1]]
}

centers_k2 = pd.DataFrame(centroids_data)
print(centers_k2)

   Latitude  Longitude
0      -0.0      -90.0
1      -0.0       90.0


In [None]:
print(centers_k2)

   Latitude  Longitude
0      -0.0      -90.0
1      -0.0       90.0


In [None]:
print(cluster_centers)

             Latitude  Longitude
Cluster-K2                      
0.0         35.662511  19.928861
1.0         37.237176 -95.392322


In [None]:
import numpy as np
import pandas as pd

def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371
    return c * r

distances = np.zeros((len(cluster_centers), len(centers_k2)))

for i in range(len(cluster_centers)):
    for j in range(len(centers_k2)):
        lat1, lon1 = cluster_centers.iloc[i]['Latitude'], cluster_centers.iloc[i]['Longitude']
        lat2, lon2 = centers_k2.iloc[j]['Latitude'], centers_k2.iloc[j]['Longitude']
        distances[i, j] = haversine(lat1, lon1, lat2, lon2)

distances_df = pd.DataFrame(distances, index=cluster_centers.index, columns=centers_k2.index)
print(distances_df)

                       0             1
Cluster-K2                            
0.0         11795.239834   8219.846962
1.0          4177.539082  15837.547714


In [None]:
distances = []
for i in range(2):
    kmeans_lat, kmeans_lon = cluster_centers['Latitude'][i], cluster_centers['Longitude'][i]
    distance = np.sqrt((kmeans_lat - orig_lat)**2 + (kmeans_lon - orig_lon)**2)
    distances.append(distance)

distances_df = pd.DataFrame({'Distance': distances})
distances_df.index.name = 'Centroid'

print(distances_df)

           Distance
Centroid           
0         59.659279
1         52.044224


## 6. Membuat Fungsi K-Means dan Menghitung Cluster Center dari 2 atau 3 cluster

In [None]:
world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

map_center = [cluster_centers_K3['Latitude'].mean(), cluster_centers_K3['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: 'yellow', 1: 'orange', 2: 'red',}

folium.Choropleth(
    geo_data='world.json',
    name='choropleth',
    data=df,
    columns=['Country', 'Cluster-K3'],
    key_on='feature.properties.name',
    fill_color='YlOrRd',
    fill_opacity=0.8,
    line_opacity=0.5,
    nan_fill_color='white',
    nan_fill_opacity=0,
    legend_name='Cluster'
).add_to(my_map)

for idx, center in cluster_centers_K3.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

folium.LayerControl().add_to(my_map)

my_map

  world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


In [None]:
world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

map_center = [cluster_centers_K2['Latitude'].mean(), cluster_centers_K2['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: 'yellow', 1: 'orange', 2: 'red',}

folium.Choropleth(
    geo_data='world.json',
    name='choropleth',
    data=df,
    columns=['Country', 'Cluster-K3'],
    key_on='feature.properties.name',
    fill_color='YlOrRd',
    fill_opacity=0.8,
    line_opacity=0.5,
    nan_fill_color='white',
    nan_fill_opacity=0,
    legend_name='Cluster'
).add_to(my_map)

for idx, center in cluster_centers_K3.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

for i in range(len(cluster_centers_K3)):
    for j in range(i + 1, len(cluster_centers_K3)):
        point1 = cluster_centers_K3.iloc[i]
        point2 = cluster_centers_K3.iloc[j]
        folium.PolyLine(
            locations=[[point1['Latitude'], point1['Longitude']], [point2['Latitude'], point2['Longitude']]],
            color='black',
            weight=2,
            dash_array='5, 5'
        ).add_to(my_map)

folium.LayerControl().add_to(my_map)

my_map

  world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


In [None]:


for i in range(len(cluster_centers_K3)):
    for j in range(i + 1, len(cluster_centers_K3)):
        point1 = cluster_centers_K3.iloc[i]
        point2 = cluster_centers_K3.iloc[j]
        folium.PolyLine(
            locations=[[point1['Latitude'], point1['Longitude']], [point2['Latitude'], point2['Longitude']]],
            color='black',
            weight=2,
            dash_array='5, 5'
        ).add_to(my_map)

In [None]:
from shapely.geometry import MultiPoint, Point
import geopandas as gpd

cluster_polygons = []

for cluster_id, cluster_data in df.groupby('Cluster-K3'):
    points = MultiPoint(cluster_data[['Longitude', 'Latitude']].values)
    buffer = points.buffer(0.1)
    cluster_polygon = buffer.convex_hull
    cluster_polygons.append({'Cluster-K3': cluster_id, 'geometry': cluster_polygon})
cluster_polygons_gdf = gpd.GeoDataFrame(cluster_polygons)

cluster_polygons_gdf.set_crs(epsg=4326, inplace=True)

cluster_centers = df.groupby('Cluster-K3').agg({'Latitude': 'mean', 'Longitude': 'mean'})
map_center = [df['Latitude'].mean(), df['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)
cluster_colors = {0: 'yellow', 1: 'orange', 2: 'red',}

for idx, center in cluster_centers_K3.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

for cluster_id, cluster_data in cluster_polygons_gdf.iterrows():
    cluster_color = cluster_colors[cluster_data['Cluster-K3']]
    folium.GeoJson(
        cluster_data['geometry'],
        style_function=lambda feature, color=cluster_color: {
            'fillColor': color,
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.5,
        }
    ).add_to(my_map)

for i in range(len(cluster_centers_K3)):
    for j in range(i + 1, len(cluster_centers_K3)):
        point1 = cluster_centers_K3.iloc[i]
        point2 = cluster_centers_K3.iloc[j]
        folium.PolyLine(
            locations=[[point1['Latitude'], point1['Longitude']], [point2['Latitude'], point2['Longitude']]],
            color='black',
            weight=2,
            dash_array='5, 5'
        ).add_to(my_map)

my_map

In [None]:
from shapely.geometry import MultiPoint, Point
import geopandas as gpd

cluster_polygons = []
for cluster_id, cluster_data in df.groupby('Cluster-K3'):
    points = MultiPoint(cluster_data[['Longitude', 'Latitude']].values)
    buffer = points.buffer(0.1)
    cluster_polygon = buffer.convex_hull
    cluster_polygons.append({'Cluster-K3': cluster_id, 'geometry': cluster_polygon})

cluster_polygons_gdf = gpd.GeoDataFrame(cluster_polygons)

cluster_polygons_gdf.set_crs(epsg=4326, inplace=True)

cluster_centers = df.groupby('Cluster-K3').agg({'Latitude': 'mean', 'Longitude': 'mean'})
map_center = [df['Latitude'].mean(), df['Longitude'].mean()]
my_map = folium.Map(location=map_center, zoom_start=3)

cluster_colors = {0: '#1f77b4', 1: '#ff7f0e', 2: '#2ca02c'}

for idx, row in df.iterrows():
    cluster_color = cluster_colors[row['Cluster-K3']]
    folium.CircleMarker(
        location=[row['Latitude'], row['Longitude']],
        radius=5,
        color='black',
        fill=True,
        fill_color=cluster_color,
        fill_opacity=0.8
    ).add_to(my_map)

for idx, center in cluster_centers_K3.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

for cluster_id, cluster_data in cluster_polygons_gdf.iterrows():
    cluster_color = cluster_colors[cluster_data['Cluster-K3']]
    folium.GeoJson(
        cluster_data['geometry'],
        style_function=lambda feature, color=cluster_color: {
            'fillColor': color,
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.5,
        }
    ).add_to(my_map)

my_map

In [None]:
from shapely.geometry import Point, Polygon
import math

world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

square_bounds = [-180, -90, 180, 90]
square_polygon = Polygon([
    (square_bounds[0], square_bounds[1]),
    (square_bounds[0], square_bounds[3]),
    (square_bounds[2], square_bounds[3]),
    (square_bounds[2], square_bounds[1]),
    (square_bounds[0], square_bounds[1])
])

center_lat = (square_bounds[1] + square_bounds[3]) / 2
center_lon = (square_bounds[0] + square_bounds[2]) / 2
center_point = (center_lat, center_lon)

def calculate_point(center, distance, angle):
    lat, lon = center
    lat_rad = math.radians(lat)
    lon_rad = math.radians(lon)
    angle_rad = math.radians(angle)
    distance_deg = distance / 111

    new_lat = lat + (distance_deg * math.cos(angle_rad))
    new_lon = lon + (distance_deg * math.sin(angle_rad)) / math.cos(lat_rad)

    return new_lat, new_lon

sectors = []
angles = [330, 90, 210]
distance = 20000

end_points = []

for angle in angles:
    point1 = calculate_point(center_point, distance, angle)
    point2 = calculate_point(center_point, distance, angle + 120)
    sectors.append(Polygon([center_point, point1, point2]))
    end_points.append(point1)
    end_points.append(point2)

sector_centroids = [sector.centroid.coords[0][::1] for sector in sectors]

my_map = folium.Map(location=[0, 0], zoom_start=2)

cluster_polygons = []

for cluster_id, cluster_data in df.groupby('Cluster-K3'):
    points = MultiPoint(cluster_data[['Longitude', 'Latitude']].values)
    buffer = points.buffer(0.1)
    cluster_polygon = buffer.convex_hull
    cluster_polygons.append({'Cluster-K3': cluster_id, 'geometry': cluster_polygon})

cluster_polygons_gdf = gpd.GeoDataFrame(cluster_polygons)

cluster_polygons_gdf.set_crs(epsg=4326, inplace=True)

cluster_centers = df.groupby('Cluster-K3').agg({'Latitude': 'mean', 'Longitude': 'mean'})
map_center = [df['Latitude'].mean(), df['Longitude'].mean()]

cluster_colors = {0: '#1f77b4', 1: '#2ca02c', 2: '#ff7f0e'}

for idx, center in cluster_centers_K3.iterrows():
    cluster_color = cluster_colors[idx % len(cluster_colors)]
    icon = folium.Icon(color='white', icon_color=cluster_color, icon='map-marker', prefix='fa')
    folium.Marker(
        location=[center['Latitude'], center['Longitude']],
        icon=icon,
        popup=f'Cluster {idx}',
    ).add_to(my_map)

for cluster_id, cluster_data in cluster_polygons_gdf.iterrows():
    cluster_color = cluster_colors[cluster_data['Cluster-K3']]
    folium.GeoJson(
        cluster_data['geometry'],
        style_function=lambda feature, color=cluster_color: {
            'fillColor': color,
            'color': 'black',
            'weight': 2,
            'fillOpacity': 0.5,
        }
    ).add_to(my_map)

folium.GeoJson(
    square_polygon,
    style_function=lambda feature: {
        'fillColor': 'transparent',
        'color': 'black',
        'weight': 2
    }
).add_to(my_map)

folium.Marker(
    location=center_point,
    icon=folium.Icon(color='black', icon='info-sign'),
    popup='Center Point'
).add_to(my_map)

for angle in angles:
    end_point = calculate_point(center_point, distance, angle)
    folium.PolyLine(
        locations=[center_point, end_point],
        color="black"
    ).add_to(my_map)
    end_points.append(end_point)

sector_colors = ['blue', 'green', 'orange']

for i, centroid in enumerate(sector_centroids):
    color = sector_colors[i % len(sector_colors)]
    label = f'Sector {i} Centroid'
    folium.Marker(
        location=centroid,
        icon=folium.Icon(color=color, icon='info-sign'),
        popup=label
    ).add_to(my_map)

for i, centroid in enumerate(sector_centroids):
    color = sector_colors[i % len(sector_colors)]
    label = f'Sector {i} Centroid'
    folium.Marker(
        location=centroid,
        icon=folium.Icon(color=color, icon='info-sign'),
        popup=label
    ).add_to(my_map)

    point1 = cluster_centers.iloc[i]
    folium.PolyLine(
        locations=[centroid, [point1['Latitude'], point1['Longitude']]],
        color='black',
        weight=2,
        dash_array='5, 5'
    ).add_to(my_map)

my_map

  world_geojson = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))


## 7. Perhitungan Haversine Formula 2 CLuster

In [None]:
centers_data = {
    'Latitude': [],
    'Longitude': []
}

for i, centroid in enumerate(sector_centroids[:3]):
    lat, lon = centroid
    lat = round(lat, 5)
    lon = round(lon, 5)
    centers_data['Latitude'].append(lat)
    centers_data['Longitude'].append(lon)

centers = pd.DataFrame(centers_data)

In [None]:
print(cluster_centers)

             Latitude   Longitude
Cluster-K3                       
0.0         43.795313    0.803132
1.0        -25.441349  135.016222
2.0         38.542283  -96.869289


In [None]:
print(centers)

   Latitude  Longitude
0  52.01354   30.03003
1 -52.01354   30.03003
2  -0.00000  -60.06006


In [None]:
import numpy as np
import pandas as pd

def haversine(lat1, lon1, lat2, lon2):
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    dlat = lat2 - lat1
    dlon = lon2 - lon1
    a = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2)**2
    c = 2 * np.arcsin(np.sqrt(a))
    r = 6371
    return c * r

distances = np.zeros((len(cluster_centers), len(centers)))

for i in range(len(cluster_centers)):
    for j in range(len(centers)):
        lat1, lon1 = cluster_centers.iloc[i]['Latitude'], cluster_centers.iloc[i]['Longitude']
        lat2, lon2 = centers.iloc[j]['Latitude'], centers.iloc[j]['Longitude']
        distances[i, j] = haversine(lat1, lon1, lat2, lon2)

distances_df = pd.DataFrame(distances, index=cluster_centers.index, columns=centers.index)
print(distances_df)

                       0             1             2
Cluster-K3                                          
0.0          2342.320120  11016.897286   7719.592133
1.0         13213.938900   8758.083507  16755.441223
2.0          8711.292408  15708.756520   5696.354810


In [None]:
distances = []
for i in range(3):
    kmeans_lat, kmeans_lon = cluster_centers['Latitude'][i], cluster_centers['Longitude'][i]
    orig_lat, orig_lon = centers['Latitude'][i], centers['Longitude'][i]
    distance = np.sqrt((kmeans_lat - orig_lat)**2 + (kmeans_lon - orig_lon)**2)
    distances.append(distance)

distances_df = pd.DataFrame({'Distance': distances})
distances_df.index.name = 'Centroid'

print(distances_df)

            Distance
Centroid            
0          35.575820
1         107.912633
2          61.307521
