# 📊 Student Performance Clustering Project

In this project, we use K-Means clustering to analyze how study hours relate to student results.
We aim to identify clusters or patterns in student performance.

In [None]:
# Step 1: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

In [None]:
# Step 2: Load Dataset
df = pd.read_csv("Successratebol.csv")
df = df.drop(columns=["Unnamed: 0"])
df.head()

In [None]:
# Step 3: Feature Scaling
scaler = StandardScaler()
scaled_data = scaler.fit_transform(df)

In [None]:
# Step 4: Apply K-Means Clustering
kmeans = KMeans(n_clusters=2, random_state=42)
clusters = kmeans.fit_predict(scaled_data)
df['Cluster'] = clusters
df.head()

In [None]:
# Step 5: Visualize Clusters
plt.figure(figsize=(8, 5))
sns.scatterplot(data=df, x='No of hours studying', y='Result', hue='Cluster', palette='Set2', s=100)
plt.title('K-Means Clustering of Students')
plt.xlabel('Hours Studied')
plt.ylabel('Result (0 = Fail, 1 = Pass)')
plt.grid(True)
plt.show()

In [None]:
# Step 6: Show Cluster Centers
centroids = scaler.inverse_transform(kmeans.cluster_centers_)
pd.DataFrame(centroids, columns=df.columns[:-1])