# DBSCAN Clustering on RS and Using Centroid, Apply to WS

In [None]:
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the RS dataset
rs_data = pd.read_csv('rs_data.csv')

# Standardize the features
scaler = StandardScaler()
rs_data_scaled = scaler.fit_transform(rs_data)

# Apply DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
rs_labels = dbscan.fit_predict(rs_data_scaled)

# Calculate the centroid of each cluster
centroids = []
for label in np.unique(rs_labels):
    if label != -1:  # Ignore noise points
        centroid = rs_data_scaled[rs_labels == label].mean(axis=0)
        centroids.append(centroid)
centroids = np.array(centroids)

# Load the WS dataset
ws_data = pd.read_csv('ws_data.csv')
ws_data_scaled = scaler.transform(ws_data)

# Assign WS data points to the nearest centroid
ws_labels = []
for point in ws_data_scaled:
    distances = np.linalg.norm(centroids - point, axis=1)
    ws_labels.append(np.argmin(distances))
ws_labels = np.array(ws_labels)

# Plot the results
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.scatter(rs_data_scaled[:, 0], rs_data_scaled[:, 1], c=rs_labels, cmap='viridis')
plt.title('RS Data Clustering')
plt.subplot(1, 2, 2)
plt.scatter(ws_data_scaled[:, 0], ws_data_scaled[:, 1], c=ws_labels, cmap='viridis')
plt.title('WS Data Assigned to Centroids')
plt.show()
