# SDG 4: Quality Education
## Project: Identifying Student Learning Profiles using K-Means Clustering

In [None]:
# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Step 2: Load the dataset (simulated for this example)
data = {
    'math score': np.random.randint(40, 100, 100),
    'reading score': np.random.randint(40, 100, 100),
    'writing score': np.random.randint(40, 100, 100)
}
df = pd.DataFrame(data)

In [None]:
# Step 3: Feature selection
features = ['math score', 'reading score', 'writing score']
X = df[features]

In [None]:
# Step 4: Normalize the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
# Step 5: Apply K-Means clustering
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

In [None]:
# Step 6: Add cluster information to the DataFrame
df['Cluster'] = clusters
df.head()

In [None]:
# Step 7: Visualize the clusters using PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

In [None]:
# Step 8: Create a scatter plot of the clusters
plt.figure(figsize=(10, 6))
sns.scatterplot(x=X_pca[:, 0], y=X_pca[:, 1], hue=df['Cluster'], palette='Set2', s=100)
plt.title("Student Clusters Based on Performance", fontsize=14)
plt.xlabel("PCA Component 1")
plt.ylabel("PCA Component 2")
plt.legend(title='Cluster')
plt.grid(True)
plt.tight_layout()
plt.show()