In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1. Load data
df = pd.read_csv(r"C:\Data\student_scores_selected_features.csv")

# 2. Create target: 1 = (High or Excellent), 0 = others
y = df["performance_category"].isin(["Excellent", "High"]).astype(int)

# 3. Select the top 5 most important features
top5_features = [
    "average_score",
    "science_avg",
    "humanities_avg",
    "performance_consistency",
    "engagement_score"
]

X = df[top5_features]

# 4. Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 5. Logistic Regression model
model = LogisticRegression(max_iter=1000, solver="liblinear")
model.fit(X_train, y_train)

# 6. Predictions
y_pred = model.predict(X_test)

# 7. Evaluation
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

# 8. Model coefficients
coef_df = pd.DataFrame({
    "feature": top5_features,
    "coefficient": model.coef_[0]
}).sort_values(by="coefficient", ascending=False)

print("\nFeature Importance (Logistic Regression Coefficients):")
print(coef_df)


Accuracy: 0.93

Classification Report:
               precision    recall  f1-score   support

           0       0.92      0.58      0.71        59
           1       0.93      0.99      0.96       341

    accuracy                           0.93       400
   macro avg       0.93      0.78      0.83       400
weighted avg       0.93      0.93      0.92       400


Confusion Matrix:
 [[ 34  25]
 [  3 338]]

Feature Importance (Logistic Regression Coefficients):
                   feature  coefficient
1              science_avg     0.233475
0            average_score     0.203156
2           humanities_avg     0.162729
4         engagement_score     0.073219
3  performance_consistency    -0.466406


In [12]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1. Load dataset
df = pd.read_csv(r"C:\Data\student_scores_selected_features.csv")

# 2. Binary target: 1 = High/Excellent, 0 = (Average, Low, Poor)
y = df["performance_category"].isin(["Excellent", "High"]).astype(int)

# 3. Top 5 features
top5_features = [
    "average_score",
    "science_avg",
    "humanities_avg",
    "performance_consistency",
    "engagement_score"
]

X = df[top5_features]

# 4. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 5. XGBoost model
xgb = XGBClassifier(
    n_estimators=300,
    max_depth=4,
    learning_rate=0.05,
    subsample=0.9,
    colsample_bytree=0.9,
    eval_metric="logloss",
    random_state=42
)

# 6. Train the model
xgb.fit(X_train, y_train)

# 7. Predictions
y_pred = xgb.predict(X_test)

# 8. Evaluation results
print("XGBoost Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# 9. Feature importance
importance = pd.DataFrame({
    "feature": top5_features,
    "importance": xgb.feature_importances_
}).sort_values(by="importance", ascending=False)

print("\nXGBoost Feature Importance:")
print(importance)


XGBoost Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        59
           1       1.00      1.00      1.00       341

    accuracy                           1.00       400
   macro avg       1.00      1.00      1.00       400
weighted avg       1.00      1.00      1.00       400

Confusion Matrix:
 [[ 59   0]
 [  0 341]]

XGBoost Feature Importance:
                   feature  importance
0            average_score    0.764206
1              science_avg    0.151636
2           humanities_avg    0.074070
4         engagement_score    0.007681
3  performance_consistency    0.002407
