In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('students.csv')

# Select relevant features
features = ['GPA', 'study_hours', 'attendance_rate']
X = df[features]

In [None]:
from sklearn.preprocessing import StandardScaler

# Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

inertia = []
K_range = range(2, 7)

for k in K_range:
    kmeans = KMeans(n_clusters=k, random_state=42)
    kmeans.fit(X_scaled)
    inertia.append(kmeans.inertia_)

# Plot elbow
plt.figure(figsize=(6, 4))
plt.plot(K_range, inertia, marker='o')
plt.title('Elbow Method for Optimal K')
plt.xlabel('Number of clusters (K)')
plt.ylabel('Inertia')
plt.grid(True)
plt.show()

In [None]:
# Apply KMeans with optimal K
optimal_k = 3
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
clusters = kmeans.fit_predict(X_scaled)

# Add cluster labels to original DataFrame
df['cluster'] = clusters

In [None]:
import seaborn as sns

plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='study_hours', y='GPA', hue='cluster', palette='viridis', s=100)
plt.title('Student Clusters Based on GPA and Study Hours')
plt.xlabel('Average Weekly Study Hours')
plt.ylabel('GPA')
plt.legend(title='Cluster')
plt.grid(True)
plt.show()

In [None]:
# Show student_id and their cluster label
result_df = df[['student_id', 'cluster']]
print(result_df.head())