In [None]:
# Description
# Capstone Project: Student Success & Career Path Prediction

# Scenario

# The university wants to analyze student performance data to:

# Predict exam scores (Regression).
# Classify students into “At Risk” vs. “On Track” categories (Classification).
# Cluster students into groups with similar study habits (Clustering).
# Recommend interventions (extra tutoring, workshops, counseling).
# /content/Student Success & Career Path  - Sheet1.csv


In [6]:
#1. Regression: Predict Final Exam Score
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, r2_score

# Load dataset
df = pd.read_csv("Student Success & Career Path  - Sheet1.csv")

# Select features and target
X = df[["Hours_Studied", "Attendance (%)", "Assignments_Submitted","Previous_Sem_GPA", "Participation_Score", "Age"]]
y = df["Final_Exam_Score"]

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train
model = LinearRegression()
model.fit(X_train, y_train)

# Predict
predictions = model.predict(X_test)

print("MAE:", mean_absolute_error(y_test, predictions))
print("R2 Score:", r2_score(y_test, predictions))

MAE: 1.460846916498225
R2 Score: 0.977785665607859


In [7]:
#2 Classification of At Risk vs On Track
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Encode target
df["Pass_Fail"] = df["Pass_Fail"].map({"Pass": 1, "Fail": 0})

X = df[["Hours_Studied", "Attendance (%)", "Assignments_Submitted","Previous_Sem_GPA", "Participation_Score"]]
y = df["Pass_Fail"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = LogisticRegression()
clf.fit(X_train, y_train)
pred = clf.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred))

Accuracy: 0.75
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.75      1.00      0.86         3

    accuracy                           0.75         4
   macro avg       0.38      0.50      0.43         4
weighted avg       0.56      0.75      0.64         4



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [8]:
#3 Clustering of Study Habit Groups
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

features = df[["Hours_Studied", "Attendance (%)", "Assignments_Submitted","Previous_Sem_GPA", "Participation_Score"]]
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)
kmeans = KMeans(n_clusters=3, random_state=42)
df["Cluster"] = kmeans.fit_predict(scaled_features)
print(df[["Student_ID", "Cluster"]])

   Student_ID  Cluster
0        S001        0
1        S002        2
2        S003        0
3        S004        1
4        S005        2
5        S006        2
6        S007        1
7        S008        0
8        S009        1
9        S010        0
10       S011        2
11       S012        1
12       S013        0
13       S014        2
14       S015        1
15       S016        0
16       S017        1
17       S018        0
18       S019        1
19       S020        0


In [13]:
#4 Intervention Recommendation Logic
def recommend(row):
    if row["Pass_Fail"] == 0:
        return "MandatoryTutoring + Counseling"
    elif row["Hours_Studied"] < 5:
        return "StudentSkillsWorkshop"
    elif row["Attendance (%)"] < 60:
        return "Attendance Monitoring Program"
    else:
        return "On Track"

df["Recommendation"] = df.apply(recommend, axis=1)

print(df[["Student_ID", "Recommendation"]])

   Student_ID                  Recommendation
0        S001                        On Track
1        S002  MandatoryTutoring + Counseling
2        S003                        On Track
3        S004  MandatoryTutoring + Counseling
4        S005                        On Track
5        S006                        On Track
6        S007  MandatoryTutoring + Counseling
7        S008                        On Track
8        S009  MandatoryTutoring + Counseling
9        S010                        On Track
10       S011                        On Track
11       S012  MandatoryTutoring + Counseling
12       S013                        On Track
13       S014                        On Track
14       S015  MandatoryTutoring + Counseling
15       S016                        On Track
16       S017  MandatoryTutoring + Counseling
17       S018                        On Track
18       S019  MandatoryTutoring + Counseling
19       S020                        On Track
