# 🎓 Prediksi Performa Mahasiswa Berbasis Probabilistic Reasoning
Notebook ini menggunakan berbagai model probabilistik untuk memprediksi GPA dan GradeClass mahasiswa.

In [None]:
%pip install pandas numpy matplotlib seaborn scikit-learn pgmpy pomegranate hmmlearn networkx

Collecting pomegranate
  Downloading pomegranate-1.1.2-py3-none-any.whl.metadata (566 bytes)
Collecting hmmlearn
  Downloading hmmlearn-0.3.3-cp312-cp312-win_amd64.whl.metadata (3.1 kB)
Collecting apricot-select>=0.6.1 (from pomegranate)
  Downloading apricot-select-0.6.1.tar.gz (28 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting numba>=0.43.0 (from apricot-select>=0.6.1->pomegranate)
  Downloading numba-0.61.2-cp312-cp312-win_amd64.whl.metadata (2.9 kB)
Collecting nose (from apricot-select>=0.6.1->pomegranate)
  Downloading nose-1.3.7-py3-none-any.whl.metadata (1.7 kB)
Collecting llvmlite<0.45,>=0.44.0dev0 (from numba>=0.43.0->apricot-select>=0.6.1->pomegranate)
  Downloading llvmlite-0.44.0-cp312-cp312-win_amd64.whl.metadata (5.0 kB)
Downloading pomegranate-1.1.2-py3-none-any.whl (98 kB)
Downloading hmmlearn-0.3.3-cp312-cp312-win_amd64.whl (127 kB)
Downloading numba-0.61.2-cp312-cp312-win_amd64.whl (2.8 MB)
   -----

In [2]:
# 📦 Instalasi dan Import Library
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.mixture import GaussianMixture
from sklearn.metrics import classification_report, mean_squared_error
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from pgmpy.models import BayesianNetwork
from pgmpy.estimators import HillClimbSearch, K2Score, BayesianEstimator
# from pgmpy.estimators import HillClimbSearch, 
from pgmpy.inference import VariableElimination
from hmmlearn import hmm
import networkx as nx

ImportError: cannot import name 'K2Score' from 'pgmpy.estimators' (f:\laragon\bin\python\python-3.12.2\Lib\site-packages\pgmpy\estimators\__init__.py)

## 📊 Load Dataset & Preprocessing

In [None]:
df = pd.read_csv("Student_performance_data_.csv")
# Encode categorical
label_encoders = {}
for col in ['Gender', 'Ethnicity', 'ParentalEducation', 'Tutoring', 'ParentalSupport', 'Extracurricular', 'Sports', 'Music', 'Volunteering']:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le
df.fillna(df.mean(numeric_only=True), inplace=True)
df.describe()

## 📈 Visualisasi Korelasi dan Distribusi

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(df.corr(), annot=True, cmap="coolwarm")
plt.title("Korelasi Antar Variabel")
plt.show()
sns.histplot(df['GPA'], kde=True)
plt.title("Distribusi GPA")
plt.show()
sns.countplot(data=df, x='GradeClass')
plt.title("Distribusi GradeClass")
plt.show()

## 🔁 Split Data

In [None]:
X = df.drop(['GPA', 'GradeClass', 'StudentID'], axis=1)
y_gpa = df['GPA']
y_grade = df['GradeClass']
X_train, X_test, y_train_gpa, y_test_gpa = train_test_split(X, y_gpa, test_size=0.2, random_state=42)
_, _, y_train_grade, y_test_grade = train_test_split(X, y_grade, test_size=0.2, random_state=42)

## 🧠 Naive Bayes Classifier

In [None]:
nb = GaussianNB()
nb.fit(X_train, y_train_grade)
pred_nb = nb.predict(X_test)
print(classification_report(y_test_grade, pred_nb))

## 📉 Gaussian Mixture Model

In [None]:
gmm = GaussianMixture(n_components=5, random_state=0)
gmm.fit(X_train)
pred_gmm = gmm.predict(X_test)

## 🧠 Probabilistic Neural Network (MLP Approximation)

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=1)
mlp.fit(X_train, y_train_grade)
pred_mlp = mlp.predict(X_test)
print(classification_report(y_test_grade, pred_mlp))

## 🔄 Hidden Markov Model

In [None]:
model_hmm = hmm.GaussianHMM(n_components=5, covariance_type="diag", n_iter=1000)
model_hmm.fit(X_train)
hidden_states = model_hmm.predict(X_test)

## 📡 Bayesian Network

In [None]:
discretized_df = df.copy()
discretized_df['GPA'] = pd.qcut(discretized_df['GPA'], q=5, labels=False)
discretized_df['GradeClass'] = discretized_df['GradeClass'].astype(int)
hc = HillClimbSearch(discretized_df)
model = hc.estimate(scoring_method=BicScore(discretized_df))
bn = BayesianNetwork(model.edges())
bn.fit(discretized_df, estimator=BayesianEstimator)
inference = VariableElimination(bn)
q = inference.query(variables=['GradeClass'], evidence={'StudyTimeWeekly': 2, 'Absences': 0})
print(q)

## 🧠 Random Forest sebagai simulasi MRF

In [None]:
rf = RandomForestClassifier()
rf.fit(X_train, y_train_grade)
pred_rf = rf.predict(X_test)
print(classification_report(y_test_grade, pred_rf))

## 📌 Regresi GPA (Linear Regression)

In [None]:
lr = LinearRegression()
lr.fit(X_train, y_train_gpa)
gpa_pred = lr.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test_gpa, gpa_pred))
print("RMSE GPA:", rmse)