In [None]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generating sample data
np.random.seed(42)
n_samples = 1000
salary = np.random.normal(loc=50000, scale=15000, size=n_samples).reshape(-1, 1)  # Assuming salary follows a normal distribution
data = pd.DataFrame(salary, columns=['Salary'])

# Scaling the data
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data)

# K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans_labels = kmeans.fit_predict(scaled_data)

# DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan_labels = dbscan.fit_predict(scaled_data)

# Adding labels to the original data
data['KMeans_Labels'] = kmeans_labels
data['DBSCAN_Labels'] = dbscan_labels

# Visualizing clusters
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.scatter(data['Salary'], np.zeros_like(data['Salary']), c=kmeans_labels, cmap='viridis')
plt.title('K-means Clustering')
plt.xlabel('Salary')
plt.yticks([])
plt.subplot(1, 2, 2)
plt.scatter(data['Salary'], np.zeros_like(data['Salary']), c=dbscan_labels, cmap='viridis')
plt.title('DBSCAN Clustering')
plt.xlabel('Salary')
plt.yticks([])
plt.show()


In [None]:
# Generating sample customer IDs
customer_ids = ['Cust' + str(i) for i in range(n_samples)]

# Adding customer IDs to the data
data['Customer_ID'] = customer_ids

# Creating pivot tables for K-means and DBSCAN labels
kmeans_pivot_table = pd.pivot_table(data, values='Customer_ID', index='KMeans_Labels', aggfunc=list)
dbscan_pivot_table = pd.pivot_table(data, values='Customer_ID', index='DBSCAN_Labels', aggfunc=list)

print("K-means Pivot Table:")
print(kmeans_pivot_table)
print("\nDBSCAN Pivot Table:")
print(dbscan_pivot_table)


In [None]:
import seaborn as sns

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='KMeans_Labels', y='Salary', data=data)
plt.title('Box plot of Salary by K-means Clusters')
plt.xlabel('Cluster Label')
plt.ylabel('Salary')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
for label in sorted(data['KMeans_Labels'].unique()):
    sns.histplot(data[data['KMeans_Labels'] == label]['Salary'], kde=True, label=f'Cluster {label}')
plt.title('Histogram of Salary by K-means Clusters')
plt.xlabel('Salary')
plt.ylabel('Frequency')
plt.legend()
plt.show()


In [None]:
plt.figure(figsize=(10, 6))
for label in sorted(data['KMeans_Labels'].unique()):
    sns.histplot(data[data['KMeans_Labels'] == label]['Salary'], kde=True, label=f'Cluster {label}')
plt.title('Histogram of Salary by K-means Clusters')
plt.xlabel('Salary')
plt.ylabel('Frequency')
plt.legend()
plt.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generating sample data (assuming two features: amount and frequency of transactions)
np.random.seed(42)
n_normal_transactions = 1000
n_fraud_transactions = 50

normal_transactions = np.random.normal(loc=[100, 10], scale=[20, 5], size=(n_normal_transactions, 2))
fraud_transactions = np.random.normal(loc=[400, 100], scale=[50, 20], size=(n_fraud_transactions, 2))

# Combining normal and fraud transactions
transactions = np.vstack((normal_transactions, fraud_transactions))
labels = np.array([0] * n_normal_transactions + [1] * n_fraud_transactions)  # 0 for normal, 1 for fraud

# Scaling the data
scaler = StandardScaler()
scaled_transactions = scaler.fit_transform(transactions)

# DBSCAN clustering
dbscan = DBSCAN(eps=0.5, min_samples=5)
dbscan.fit(scaled_transactions)
predicted_labels = dbscan.labels_

# Visualizing clusters
plt.figure(figsize=(10, 6))
plt.scatter(scaled_transactions[:, 0], scaled_transactions[:, 1], c=predicted_labels, cmap='viridis')
plt.title('DBSCAN Clustering for Fraud Detection')
plt.xlabel('Scaled Amount of Transaction')
plt.ylabel('Scaled Frequency of Transaction')
plt.colorbar(label='Cluster Label')
plt.show()

# Identify potential fraud transactions
potential_fraud_indices = np.where(predicted_labels == -1)[0]
potential_fraud_transactions = transactions[potential_fraud_indices]

print("Potential Fraud Transactions:")
print(potential_fraud_transactions)


In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generate sample traffic data
np.random.seed(42)

# Simulated traffic data with features: hour of the day, route length, and average speed
n_samples = 1000
traffic_data = pd.DataFrame({
    'hour_of_day': np.random.randint(low=0, high=24, size=n_samples),
    'route_length': np.random.uniform(low=5, high=20, size=n_samples),  # in miles
    'average_speed': np.random.randint(low=20, high=70, size=n_samples)  # in mph
})

# Normalize the data
scaler = StandardScaler()
scaled_traffic_data = scaler.fit_transform(traffic_data)

# Perform K-means clustering
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(scaled_traffic_data)
labels = kmeans.labels_

# Visualize the clusters
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.scatter(traffic_data['hour_of_day'], traffic_data['average_speed'], c=labels, cmap='viridis', s=10)
plt.title('Hour of Day vs Average Speed')
plt.xlabel('Hour of Day')
plt.ylabel('Average Speed')
plt.colorbar(label='Cluster Label')

plt.subplot(1, 2, 2)
plt.scatter(traffic_data['route_length'], traffic_data['average_speed'], c=labels, cmap='viridis', s=10)
plt.title('Route Length vs Average Speed')
plt.xlabel('Route Length (miles)')
plt.ylabel('Average Speed')
plt.colorbar(label='Cluster Label')

plt.tight_layout()
plt.show()

# Analyze cluster centers
cluster_centers = scaler.inverse_transform(kmeans.cluster_centers_)
cluster_centers_df = pd.DataFrame(cluster_centers, columns=['hour_of_day', 'route_length', 'average_speed'])
print("Cluster Centers:")
print(cluster_centers_df)


In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generate sample customer data
np.random.seed(42)

# Simulated customer data with features: age, income, and spending score
n_samples = 1000
customer_data = pd.DataFrame({
    'age': np.random.randint(low=18, high=70, size=n_samples),
    'income': np.random.normal(loc=50000, scale=20000, size=n_samples),
    'spending_score': np.random.randint(low=1, high=101, size=n_samples)
})

# Normalize the data
scaler = StandardScaler()
scaled_customer_data = scaler.fit_transform(customer_data)

# Perform K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(scaled_customer_data)
labels = kmeans.labels_

# Analyze cluster centers
cluster_centers = scaler.inverse_transform(kmeans.cluster_centers_)
cluster_centers_df = pd.DataFrame(cluster_centers, columns=['age', 'income', 'spending_score'])

# Identify potential customers within each segment
segmented_customers = customer_data.copy()
segmented_customers['segment'] = labels

# Potential customers within each segment (e.g., customers with high spending score)
potential_customers = {}
for segment in range(kmeans.n_clusters):
    segment_data = segmented_customers[segmented_customers['segment'] == segment]
    potential_customers[segment] = segment_data

# Print potential customers within each segment
for segment, segment_data in potential_customers.items():
    print(f"Potential customers in segment {segment}:")
    print(segment_data.head())  # Adjust this to show more or fewer potential customers as needed
    print()


In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Generate sample customer data
np.random.seed(42)

# Simulated customer data with features: age, income, and spending score
n_samples = 1000
customer_data = pd.DataFrame({
    'age': np.random.randint(low=18, high=70, size=n_samples),
    'income': np.random.normal(loc=50000, scale=20000, size=n_samples),
    'spending_score': np.random.randint(low=1, high=101, size=n_samples)
})

# Normalize the data
scaler = StandardScaler()
scaled_customer_data = scaler.fit_transform(customer_data)

# Perform K-means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
kmeans.fit(scaled_customer_data)
labels = kmeans.labels_

# Analyze cluster centers
cluster_centers = scaler.inverse_transform(kmeans.cluster_centers_)
cluster_centers_df = pd.DataFrame(cluster_centers, columns=['age', 'income', 'spending_score'])

# Identify potential customers within each segment
segmented_customers = customer_data.copy()
segmented_customers['segment'] = labels

# Potential customers within each segment (e.g., customers with high spending score)
potential_customers = {}
for segment in range(kmeans.n_clusters):
    segment_data = segmented_customers[segmented_customers['segment'] == segment]
    potential_customers[segment] = segment_data

# Visualize potential customers within each segment
plt.figure(figsize=(12, 6))
for segment, segment_data in potential_customers.items():
    plt.scatter(segment_data['age'], segment_data['income'], label=f'Segment {segment}', alpha=0.7)

plt.scatter(cluster_centers_df['age'], cluster_centers_df['income'], color='black', marker='x', label='Cluster Centers')
plt.title('Customer Segmentation with Potential Customers')
plt.xlabel('Age')
plt.ylabel('Income')
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import numpy as np
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Generate random earthquake data
np.random.seed(42)
n_samples = 1000

# Generate random latitude and longitude within ranges
latitude = np.random.uniform(low=-90, high=90, size=n_samples)
longitude = np.random.uniform(low=-180, high=180, size=n_samples)

# Generate random magnitudes
magnitude = np.random.uniform(low=2.0, high=7.0, size=n_samples)

# Create a DataFrame to store the earthquake data
earthquake_data = pd.DataFrame({'Latitude': latitude, 'Longitude': longitude, 'Magnitude': magnitude})

# Perform K-means clustering
kmeans = KMeans(n_clusters=5, random_state=42)
kmeans.fit(earthquake_data[['Latitude', 'Longitude']])
labels = kmeans.labels_

# Visualize the clusters on a map
plt.figure(figsize=(10, 6))
plt.scatter(earthquake_data['Longitude'], earthquake_data['Latitude'], c=labels, cmap='viridis', s=10)
plt.title('Earthquake Clusters')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.colorbar(label='Cluster Label')
plt.grid(True)
plt.show()


In [None]:
import networkx as nx
import matplotlib.pyplot as plt
from networkx.algorithms import community

# Load social network data (example: edge list)
# You can replace this with your own social network data
G = nx.karate_club_graph()

# Perform community detection using Louvain method
communities = community.greedy_modularity_communities(G)

# Plot the social network graph with communities color-coded
plt.figure(figsize=(10, 6))
pos = nx.spring_layout(G)  # Position nodes using Fruchterman-Reingold force-directed algorithm
nx.draw(G, pos, node_color='lightblue', with_labels=True, node_size=300)
for idx, com in enumerate(communities):
    nx.draw_networkx_nodes(G, pos, nodelist=list(com), node_color=f'C{idx}', node_size=300)
plt.title('Social Network with Community Detection')
plt.show()

# Print communities
for idx, com in enumerate(communities):
    print(f"Community {idx}: {list(com)}")
