In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import DBSCAN

# Load the dataset
df = pd.read_csv("social_network_data.csv")  # Replace with actual file

# Select relevant features
selected_features = ['Num_Friends', 'Mutual_Connections', 'Interaction_Frequency', 'Network_Centrality']

# Ensure selected features exist in the dataset
for feature in selected_features:
    if feature not in df.columns:
        raise KeyError(f"Column '{feature}' not found in dataset!")

X = df[selected_features]

# Standardize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Apply DBSCAN clustering
dbscan = DBSCAN(eps=0.9, min_samples=3)  # Adjust parameters based on network structure
df['Cluster'] = dbscan.fit_predict(X_scaled)

# Save results
df.to_csv("social_network_clusters.csv", index=False)

# Print cluster distribution
print("Clusters found:\n", df['Cluster'].value_counts())
print("Results saved as 'social_network_clusters.csv'.")

Clusters found:
 Cluster
-1    17
 0     3
Name: count, dtype: int64
Results saved as 'social_network_clusters.csv'.


In [3]:
df.head(8)

Unnamed: 0,Num_Friends,Mutual_Connections,Interaction_Frequency,Network_Centrality,Cluster
0,571,166,36,0.16,0
1,879,179,50,1.0,-1
2,678,393,34,0.81,-1
3,198,224,50,0.41,-1
4,869,204,39,0.73,-1
5,111,41,2,0.57,-1
6,403,111,49,0.27,0
7,997,327,12,0.82,-1
