#246

In [3]:
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

# Load the dataset
data = pd.read_csv('traffic_accidents.csv')
locations = data[['latitude', 'longitude']].values

# Normalize the data
locations_scaled = StandardScaler().fit_transform(locations)

# Apply DBSCAN clustering
db = DBSCAN(eps=0.1, min_samples=5)
labels = db.fit_predict(locations_scaled)

# Visualize the results
plt.scatter(locations[:, 0], locations[:, 1], c=labels, cmap='viridis')
plt.xlabel('Latitude')
plt.ylabel('Longitude')
plt.title('DBSCAN Clustering of Traffic Accidents')
plt.show()

# Print hotspots (clusters, excluding noise points)
for cluster_id in set(labels):
    if cluster_id != -1:  # Exclude noise points (-1)
        cluster_points = locations[labels == cluster_id]
        print(f"Hotspot {cluster_id}:")
        print(cluster_points)

FileNotFoundError: [Errno 2] No such file or directory: 'traffic_accidents.csv'

#247

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

def visualize_dbscan(data, eps, min_samples):
    # Normalize the data
    data_scaled = StandardScaler().fit_transform(data)
    
    # Apply DBSCAN
    db = DBSCAN(eps=eps, min_samples=min_samples)
    labels = db.fit_predict(data_scaled)
    
    # Plot the clusters
    plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis')
    plt.xlabel('Latitude')
    plt.ylabel('Longitude')
    plt.title(f'DBSCAN Clustering (eps={eps}, min_samples={min_samples})')
    plt.colorbar(label='Cluster ID')
    plt.show()

# Example usage:
# Load the dataset (replace with your actual dataset)
data = pd.read_csv('traffic_accidents.csv')
locations = data[['latitude', 'longitude']].values

# Call the function with desired parameters
visualize_dbscan(locations, eps=0.1, min_samples=5)

#248

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
from sklearn.cluster import DBSCAN

# Generate a simple 2D dataset (moons)
X, _ = make_moons(n_samples=300, noise=0.1, random_state=42)

# Apply DBSCAN
db = DBSCAN(eps=0.2, min_samples=5)
labels = db.fit_predict(X)

# Plot the clustering result
plt.scatter(X[:, 0], X[:, 1], c=labels, cmap='viridis')
plt.title('DBSCAN Clustering on make_moons dataset')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.show()

#249

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN

# Load the dataset (replace 'your_dataset.csv' with the actual filename)
data = pd.read_csv('your_dataset.csv')

# Select the 3rd and 4th columns (index 2 and 3)
X = data.iloc[:, [2, 3]]

# Apply DBSCAN with eps=5 and min_samples=5
db = DBSCAN(eps=5, min_samples=5)
labels = db.fit_predict(X)

# Plot the clustering result
plt.scatter(X.iloc[:, 0], X.iloc[:, 1], c=labels, cmap='viridis')
plt.title('DBSCAN Clustering (3rd and 4th Columns)')
plt.show()

#250

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler

# Sample customer data (frequency of purchases, average transaction amount)
data = pd.DataFrame({
    'frequency': [10, 15, 9, 23, 4, 20, 18, 25, 5, 7],
    'avg_transaction': [200, 220, 190, 250, 160, 210, 230, 240, 180, 170]
})

# Normalize the data (important for DBSCAN)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(data)

# Apply DBSCAN clustering
db = DBSCAN(eps=0.5, min_samples=2)
labels = db.fit_predict(X_scaled)

# Add the cluster labels to the data
data['Cluster'] = labels

# Display the clusters
print("Customer Segments:")
print(data.groupby('Cluster').mean())

# Plot the clusters
plt.scatter(data['frequency'], data['avg_transaction'], c=labels, cmap='viridis')
plt.title('Customer Segments Based on Purchasing Behavior')
plt.xlabel('Frequency of Purchases')
plt.ylabel('Average Transaction Amount')
plt.show()

#251

In [None]:
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN

# Define the data points
data = np.array([
    [3, 7], [4, 6], [5, 5], [6, 4], [7, 3], [6, 2], [7, 2], [8, 4],
    [3, 3], [2, 6], [3, 5], [2, 4]
])

# Apply DBSCAN with eps=1.9 and min_samples=4
db = DBSCAN(eps=1.9, min_samples=4)
labels = db.fit_predict(data)

# Create a DataFrame for points and their corresponding cluster labels
df = pd.DataFrame(data, columns=['X', 'Y'])
df['Cluster'] = labels

# Classify each point: core (if more than 3 neighbors), border (otherwise), or noise (label -1)
df['Point_Type'] = df['Cluster'].apply(lambda x: 'Noise' if x == -1 else 'Core' if list(df['Cluster']).count(x) >= 4 else 'Border')

# Print the results
print(df)

#266

import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage

# Sample data: 2D points
X = np.array([[3, 7], [4, 6], [5, 5], [6, 4], [7, 3], [6, 2], [7, 2], [8, 4], 
              [3, 3], [2, 6], [3, 5], [2, 4]])

# Perform agglomerative clustering using linkage (Ward's method)
Z = linkage(X, method='ward')

# Create and show the dendrogram
plt.figure(figsize=(8, 6))
dendrogram(Z)
plt.title("Dendrogram")
plt.xlabel("Data Points")
plt.ylabel("Distance")
plt.show()

#267

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage

# Sample 2D data points
X = np.array([[3, 7], [4, 6], [5, 5], [6, 4], [7, 3], [6, 2], [7, 2], [8, 4], 
              [3, 3], [2, 6], [3, 5], [2, 4]])

# Linkage using different methods
single_linkage = linkage(X, method='single')  # Single linkage
complete_linkage = linkage(X, method='complete')  # Complete linkage
average_linkage = linkage(X, method='average')  # Average linkage

# Plotting the dendrogram for each method
def plot_dendrogram(linkage_matrix, method):
    plt.figure(figsize=(8, 6))
    dendrogram(linkage_matrix)
    plt.title(f"Dendrogram with {method} Linkage")
    plt.xlabel("Data Points")
    plt.ylabel("Distance")
    plt.show()

# Plot for each linkage method
plot_dendrogram(single_linkage, 'Single')
plot_dendrogram(complete_linkage, 'Complete')
plot_dendrogram(average_linkage, 'Average')

#268

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
from scipy.cluster.hierarchy import dendrogram, linkage

# Generate a simple 2D dataset using make_blobs
X, _ = make_blobs(n_samples=10, centers=3, cluster_std=1.0, random_state=42)

# Perform Agglomerative Hierarchical Clustering using 'ward' linkage method
Z = linkage(X, method='ward')

# Create a dendrogram to visualize the clustering
plt.figure(figsize=(8, 6))
dendrogram(Z)
plt.title("Dendrogram for Agglomerative Hierarchical Clustering")
plt.xlabel("Data Points")
plt.ylabel("Euclidean Distance")
plt.show()

#269

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage

# 6 one-dimensional data points
data = np.array([18, 22, 25, 42, 27, 43]).reshape(-1, 1)  # Reshape data to 2D

# Perform agglomerative hierarchical clustering using Ward's method
Z = linkage(data, method='ward')

# Plot the dendrogram
plt.figure(figsize=(8, 6))
dendrogram(Z)
plt.title("Dendrogram for Agglomerative Clustering (1D Data)")
plt.xlabel("Data Points")
plt.ylabel("Distance")
plt.show()