In [None]:
# Import necessary libraries
import pandas as pd

# Load the dataset
df = pd.read_csv("")

# Display the first few rows of the dataset
df.head()

In [None]:
# Import necessary libraries for analysis
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Select features for PCA and KMeans
features = ['Latitude', 'Longitude']
X = df[features]

In [None]:
# Standardizing the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Applying PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

In [None]:
# KMeans Clustering and Elbow Method
wcss = []
for i in range(1, 11):
    kmeans = KMeans(n_clusters=i, init='k-means++', random_state=42, n_init=10)
    kmeans.fit(X_pca)
    wcss.append(kmeans.inertia_)

In [None]:
# Plotting the Elbow Method
plt.figure(figsize=(8, 6))
plt.plot(range(1, 11), wcss)
plt.title('Elbow Method', fontsize=15)
plt.xlabel('Number of clusters')
plt.ylabel('WCSS')
plt.grid(True)
plt.show()

In [None]:
# Selecting the optimal number of clusters (e.g., 3)
kmeans = KMeans(n_clusters=3, init='k-means++', random_state=42, n_init=10)
y_kmeans = kmeans.fit_predict(X_pca)


In [None]:
# Adding cluster labels to the original dataframe
df['Cluster'] = y_kmeans

In [None]:
# Visualizing the clusters
plt.figure(figsize=(8, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=y_kmeans, palette='viridis', s=100, alpha=0.8)
plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=300, c='red', marker='X')
plt.title('Clusters Visualization', fontsize=15)
plt.xlabel('Principal Component 1')
plt.ylabel('Principal Component 2')
plt.legend(title='Cluster')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.histplot(df['Latitude'], bins=30, kde=True, color='blue')
plt.title('Distribution of Latitude', fontsize=15)
plt.xlabel('Latitude')
plt.ylabel('Frequency')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='Cluster', y='Longitude', data=df, palette='viridis')
plt.title('Boxplot of Longitude by Clusters', fontsize=15)
plt.xlabel('Cluster')
plt.ylabel('Longitude')
plt.grid(True)
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Latitude', y='Longitude', hue='Cluster', data=df, palette='viridis', s=100, alpha=0.8)
plt.title('Latitude vs Longitude by Clusters', fontsize=15)
plt.xlabel('Latitude')
plt.ylabel('Longitude')
plt.legend(title='Cluster')
plt.grid(True)
plt.show()