In [None]:
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
import numpy as np


In [None]:
x, y = make_blobs(
    n_samples=500,
    centers=3,
    n_features=2, 
    cluster_std=2.0,
    random_state=42
)

In [None]:
plt.scatter(x[:,0], x[:,1])

In [None]:
gmm = GaussianMixture(n_components=3, random_state=42)
gmm.fit(x)

In [None]:
labels = gmm.predict(x[0])
means = gmm.means_

plt.scatter(x[:, 0], x[:, 1], c=labels)
for idx, (x, y) in enumerate(means):
  plt.text(x, y, str(idx), color="black", bbox=dict(facecolor="white", edgecolor="black"))

In [None]:
new = [[0, 3]]
cluster = gmm.predict(new)
print(f"Point: {new} | Cluster: {cluster[0]}")
probs = gmm.predict_proba(new)
print(f"Probabilities: {probs[0]}")

In [None]:
def generate_cluster_data(loc, scale, size):
  return np.random.normal(loc=loc,  scale=scale, size=size)

In [None]:
cluster_1 = generate_cluster_data(5000, 2500, 300)
cluster_1 = cluster_1[cluster_1 > 0]
cluster_2 = generate_cluster_data(20000, 5000, 100)
cluster_2 = cluster_2[cluster_2 > 0]
prices = np.concatenate([cluster_1, cluster_2])


In [None]:
import seaborn as sns
flat= prices.flatten()
sns.histplot(flat, kde=True)

In [None]:
gmm = GaussianMixture(n_components=2, random_state=42)
gmm.fit(prices)

In [None]:
import pandas as pd
labels = gmm.predict(prices)
data = pd.DataFrame({"price": prices.flatten(), "cluster": labels})
print(data)