In [1]:
# Installing the 'balance-point-clustering' package from PyPI

!pip install balance-point-clustering

Collecting balance-point-clustering
  Downloading balance_point_clustering-0.2.0-py3-none-any.whl.metadata (1.8 kB)
Downloading balance_point_clustering-0.2.0-py3-none-any.whl (4.8 kB)
Installing collected packages: balance-point-clustering
Successfully installed balance-point-clustering-0.2.0


In [2]:
# Import necessary libraries

import pandas as pd
from balance_point_clustering import clustering

In [11]:
# -----------------------------------------------
# Use Case 1: Manually created Pandas DataFrame Data
# -----------------------------------------------
# Testing the algorithm with a manually created Pandas DataFrame (5D space)
# In this case, we create 6 features in a 5-dimensional space

df = {
    'feature1': [1, 2, 3, 4, 5, 6],
    'feature2': [7, 8, 9, 10, 11, 12],
    'feature3': [13, 14, 15, 16, 17, 18],
    'feature4': [19, 20, 21, 22, 23, 24],
    'feature5': [25, 26, 27, 28, 29, 30]
}
df = pd.DataFrame(df)

In [4]:
# Viewing the created DataFrame

df.head()

Unnamed: 0,feature1,feature2,feature3,feature4,feature5
0,1,7,13,19,25
1,2,8,14,20,26
2,3,9,15,21,27
3,4,10,16,22,28
4,5,11,17,23,29


In [38]:
# Running the Balance Point Clustering algorithm and letting it decide the number of clusters by using it's in-built Dynamic Clustering Feature

clusters = clustering.balance_point_clustering(df)
print("Test Case 1: Manually created Pandas DataFrame Data with Dynamic clustering:")
print(clusters)
print("\n")

Test Case 1: Manually created Pandas DataFrame Data with Dynamic clustering:
[[(3, 9, 15, 21, 27), (4, 10, 16, 22, 28)], [(2, 8, 14, 20, 26), (5, 11, 17, 23, 29)], [(1, 7, 13, 19, 25), (6, 12, 18, 24, 30)]]




In [39]:
# Viewing the Optimal number of clusters for the above Dataset based on the Balance Point clustering Algorithm

print("The Optimal number of clusters for the manually created Data is:", len(clusters))
print('\n')

The Optimal number of clusters for the manually created Data is: 3




In [40]:
# Running the Balance Point Clustering algorithm with a user specified custom number of clusters

no_clusters = 2
clusters = clustering.balance_point_clustering(df, no_clusters)
print("Test Case 2: Manually created Pandas DataFrame Data with a user specified custom number of clusters:")
print(clusters)
print("\n")

Test Case 2: Manually created Pandas DataFrame Data with a user specified custom number of clusters:
[[(3, 9, 15, 21, 27), (4, 10, 16, 22, 28)], [(2, 8, 14, 20, 26), (5, 11, 17, 23, 29), (1, 7, 13, 19, 25), (6, 12, 18, 24, 30)]]




In [41]:
# Viewing the number of custom clusters created by balance point clustering algorithm

print("The Custom number of clusters created as specified by the user is:", len(clusters))
print('\n')

The Custom number of clusters created as specified by the user is: 2




In [15]:
# -----------------------------------------------
# Use Case 2: Pre existing built-in Toy Dataset
# -----------------------------------------------
# Testing the algorithm on Pre existing built-in Toy Dataset
# In this case, we use the Wine dataset from sklearn, which contains 178 samples with 13 features

from sklearn.datasets import load_wine

wine_data = load_wine()

In [16]:
# Creating a DataFrame from the Wine dataset

df_wine = pd.DataFrame(data=wine_data.data, columns=wine_data.feature_names)

In [18]:
# Viewing the Wine Dataset

df_wine.head()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline
0,14.23,1.71,2.43,15.6,127.0,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065.0
1,13.2,1.78,2.14,11.2,100.0,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050.0
2,13.16,2.36,2.67,18.6,101.0,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185.0
3,14.37,1.95,2.5,16.8,113.0,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480.0
4,13.24,2.59,2.87,21.0,118.0,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735.0


In [19]:
# Viewing the shape of the Wine Dataset

df_wine.shape

(178, 13)

In [33]:
# Running the Balance Point Clustering algorithm and allowing it to decide the best number of clusters on the Wine Dataset

clusters_wine_auto = clustering.balance_point_clustering(df_wine)

print("\nTest Case 3: Clustering Wine Dataset (Automatic number of clusters):")
print(clusters_wine_auto)
print('\n')


Test Case 3: Clustering Wine Dataset (Automatic number of clusters):
[[(13.4, 3.91, 2.48, 23.0, 102.0, 1.8, 0.75, 0.43, 1.41, 7.3, 0.7, 1.56, 750.0)], [(13.58, 2.58, 2.69, 24.5, 105.0, 1.55, 0.84, 0.39, 1.54, 8.66, 0.74, 1.8, 750.0), (13.71, 5.65, 2.45, 20.5, 95.0, 1.68, 0.61, 0.52, 1.06, 7.7, 0.64, 1.74, 740.0), (13.34, 0.94, 2.36, 17.0, 110.0, 2.53, 1.3, 0.55, 0.42, 3.17, 1.02, 1.93, 750.0)], [(13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 1.04, 2.93, 735.0)], [(12.93, 3.8, 2.65, 18.6, 102.0, 2.41, 2.41, 0.25, 1.98, 4.5, 1.03, 3.52, 770.0), (13.17, 5.19, 2.32, 22.0, 93.0, 1.74, 0.63, 0.61, 1.55, 7.9, 0.6, 1.48, 725.0)], [(12.25, 4.72, 2.54, 21.0, 89.0, 1.38, 0.47, 0.53, 0.8, 3.85, 0.75, 1.27, 720.0), (14.22, 3.99, 2.51, 13.2, 128.0, 3.0, 3.04, 0.2, 2.08, 5.1, 0.89, 3.53, 760.0)], [(13.36, 2.56, 2.35, 20.0, 89.0, 1.4, 0.5, 0.37, 0.64, 5.6, 0.7, 2.47, 780.0), (12.72, 1.81, 2.2, 18.8, 86.0, 2.2, 2.53, 0.26, 1.77, 3.9, 1.16, 3.14, 714.0)], [(12.33, 0.99, 1.95, 14.8, 136.0

In [34]:
# Viewing the Optimal number of clusters for the Wine Dataset based on the Balance Point clustering Algorithm

print("The Optimal number of clusters for the Wine Dataset is:", len(clusters_wine_auto))
print('\n')

The Optimal number of clusters for the Wine Dataset is: 85




In [35]:
# Running the Balance Point Clustering algorithm with user specified custom number of clusters on the Wine Dataset

no_clusters = 10
clusters_wine_custom = clustering.balance_point_clustering(df_wine, no_clusters)

print("\nTest Case 4: Clustering Wine Dataset with a user specified custom number of clusters:")
print(clusters_wine_custom)
print('\n')


Test Case 4: Clustering Wine Dataset with a user specified custom number of clusters:
[[(13.4, 3.91, 2.48, 23.0, 102.0, 1.8, 0.75, 0.43, 1.41, 7.3, 0.7, 1.56, 750.0)], [(13.58, 2.58, 2.69, 24.5, 105.0, 1.55, 0.84, 0.39, 1.54, 8.66, 0.74, 1.8, 750.0), (13.71, 5.65, 2.45, 20.5, 95.0, 1.68, 0.61, 0.52, 1.06, 7.7, 0.64, 1.74, 740.0), (13.34, 0.94, 2.36, 17.0, 110.0, 2.53, 1.3, 0.55, 0.42, 3.17, 1.02, 1.93, 750.0)], [(13.24, 2.59, 2.87, 21.0, 118.0, 2.8, 2.69, 0.39, 1.82, 4.32, 1.04, 2.93, 735.0)], [(12.93, 3.8, 2.65, 18.6, 102.0, 2.41, 2.41, 0.25, 1.98, 4.5, 1.03, 3.52, 770.0), (13.17, 5.19, 2.32, 22.0, 93.0, 1.74, 0.63, 0.61, 1.55, 7.9, 0.6, 1.48, 725.0)], [(12.25, 4.72, 2.54, 21.0, 89.0, 1.38, 0.47, 0.53, 0.8, 3.85, 0.75, 1.27, 720.0), (14.22, 3.99, 2.51, 13.2, 128.0, 3.0, 3.04, 0.2, 2.08, 5.1, 0.89, 3.53, 760.0)], [(13.36, 2.56, 2.35, 20.0, 89.0, 1.4, 0.5, 0.37, 0.64, 5.6, 0.7, 2.47, 780.0), (12.72, 1.81, 2.2, 18.8, 86.0, 2.2, 2.53, 0.26, 1.77, 3.9, 1.16, 3.14, 714.0)], [(12.33, 0.99, 

In [36]:
# Viewing the number of custom clusters created by balance point clustering algorithm

print("The Custom number of clusters created as specified by the user is:", len(clusters_wine_custom))
print('\n')

The Custom number of clusters created as specified by the user is: 10


