In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier
from imblearn.over_sampling import SMOTE

# تحميل البيانات
df_mat = pd.read_csv("student-mat.csv", sep=';')
df_por = pd.read_csv("student-por.csv", sep=';')
df = pd.concat([df_mat, df_por], ignore_index=True)

# ترميز النصوص
le = LabelEncoder()
for col in df.columns:
    if df[col].dtype == 'object':
        df[col] = le.fit_transform(df[col])


df['target'] = (df['G3'] >= 10).astype(int)
df.drop(columns=['G3'], inplace=True)

# تحضير البيانات
X = df.drop(columns=['target'])
y = df['target']
X_scaled = StandardScaler().fit_transform(X)


selector = SelectKBest(score_func=f_classif, k=20)
X_selected = selector.fit_transform(X_scaled, y)

smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_selected, y)


X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)


models_params = {
    "SVM": {
        "model": SVC(probability=True),  
        "params": {
            "kernel": ["rbf"],
            "C": [50, 100, 200],
            "gamma": [0.0005, 0.001, 0.005]
        }
    },
    "Decision Tree": {
        "model": DecisionTreeClassifier(),
        "params": {
            "max_depth": [3, 5, 7],
            "min_samples_split": [2, 5, 10]
        }
    }
}


best_models = {}
for name, mp in models_params.items():
    grid = GridSearchCV(mp["model"], mp["params"], cv=5, scoring="accuracy", n_jobs=-1)
    grid.fit(X_train, y_train)
    best_models[name] = grid.best_estimator_

voting = VotingClassifier(estimators=[
    ("svm", best_models["SVM"]),
    ("dt", best_models["Decision Tree"])
], voting='soft')

voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)

report = classification_report(y_test, y_pred, output_dict=True)

print("\nModel: VotingClassifier (SVM + Decision Tree)")
print("Accuracy:", round(report["accuracy"], 4))
print("Precision:", round(report["weighted avg"]["precision"], 4))
print("Recall:", round(report["weighted avg"]["recall"], 4))
print("F1 Score:", round(report["weighted avg"]["f1-score"], 4))



Model: VotingClassifier (SVM + Decision Tree)
Accuracy: 0.9632
Precision: 0.9632
Recall: 0.9632
F1 Score: 0.9632
