# Load Clean Data

In [2]:
import pandas as pd
import numpy as np

# Fatma'nın temizleyip kaydettiği veriyi direkt okuyoruz
df = pd.read_csv("processed_clean_data.csv")

# TARGET ve PASS_FAIL 
target_col = "TARGET" if "TARGET" in df.columns else "NP_TARGET"

X = df.drop(columns=["PASS_FAIL", target_col], errors="ignore")
y = df["PASS_FAIL"]

df.shape


(25260, 69)

# Feature Selection — Chi-Square + Mutual Information

###  1.1 Imputation + Scaling for Chi2

In [3]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import SelectKBest, chi2, mutual_info_classif

# eksik değerleri median ile dolduruyoruz
imputer = SimpleImputer(strategy="median")
X_imputed = imputer.fit_transform(X)

# Chi2 negatif kabul etmez -> MinMaxScaler
scaler_mm = MinMaxScaler()
X_scaled_chi = scaler_mm.fit_transform(X_imputed)


### 1.2 Chi-Square SelectKBest

In [4]:
selector_chi = SelectKBest(chi2, k=20)
X_chi = selector_chi.fit_transform(X_scaled_chi, y)

selected_chi_features = X.columns[selector_chi.get_support()]
selected_chi_features.tolist()


['ACCOMPLISH_MANDATORY',
 'ACCOMPLISH_MANDATORY_GRADE',
 'ACCOMPLISH_MANDATORY_PCT_GRADED',
 'ACCOMPLISH_MANDATORY_PERCENTILE_GRADE',
 'NP_ACCOMPLISH_MANDATORY_GRADE',
 'NP_TARGET',
 'COURSE_VIEW_TIME_1',
 'RESOURCE_VIEW_UNIQUE_PCT',
 'URL_VIEW_UNIQUE_PCT',
 'ASSIGN_VIEW_UNIQUE_PCT',
 'QUIZ_VIEW_UNIQUE_PCT',
 'ASSIGN_SUBMIT_TIME_3',
 'ASSIGN_SUBMIT_TIME_PCT',
 'ASSIGN_SUBMIT_UNIQUE_PCT',
 'QUIZ_ATTEMPT_TIME_PCT',
 'QUIZ_ATTEMPT_UNIQUE_PCT',
 'QUIZ_CLOSE_ATTEMPT_TIME_2',
 'QUIZ_CLOSE_ATTEMPT_TIME_3',
 'QUIZ_CLOSE_ATTEMPT_TIME_PCT',
 'QUIZ_CLOSE_ATTEMPT_UNIQUE_PCT']

### 1.3 Mutual Information

In [5]:
mi_scores = mutual_info_classif(X_imputed, y, random_state=42)

mi_df = pd.DataFrame({
    "Feature": X.columns,
    "MI": mi_scores
}).sort_values("MI", ascending=False)

top_mi_features = mi_df.head(20)
top_mi_features


Unnamed: 0,Feature,MI
11,NP_TARGET,0.490355
2,ACCOMPLISH_MANDATORY_GRADE,0.364394
9,NP_ACCOMPLISH_MANDATORY_GRADE,0.286913
0,Unnamed: 0,0.222901
4,ACCOMPLISH_MANDATORY_PERCENTILE_GRADE,0.215397
35,ASSIGN_VIEW_PCT,0.105587
47,ASSIGN_SUBMIT_PCT,0.101665
12,COURSE_VIEW_PCT,0.100942
19,RESOURCE_VIEW_PCT,0.088671
3,ACCOMPLISH_MANDATORY_PCT_GRADED,0.08051


## 2- PCA (Boyut İndirgeme)

In [6]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

# PCA için standardizasyon lazım
scaler_std = StandardScaler()
X_scaled_pca = scaler_std.fit_transform(X_imputed)

pca = PCA(n_components=0.95, random_state=42)
X_pca = pca.fit_transform(X_scaled_pca)

print("Orijinal boyut:", X.shape)
print("PCA boyutu:", X_pca.shape)
print("Toplam varyans:", pca.explained_variance_ratio_.sum())


Orijinal boyut: (25260, 67)
PCA boyutu: (25260, 38)
Toplam varyans: 0.953449555252736


### Base Models — SVM + LDA

In [7]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.metrics import accuracy_score, f1_score

results_imad = []

def eval_model(model, Xd, yd, label):
    X_train, X_test, y_train, y_test = train_test_split(
        Xd, yd, test_size=0.2, random_state=42, stratify=yd
    )

    model.fit(X_train, y_train)
    pred = model.predict(X_test)

    acc = accuracy_score(y_test, pred)
    f1  = f1_score(y_test, pred)

    results_imad.append([label, acc, f1])


### 3.1 Modeller

In [8]:
svm = SVC(kernel="rbf", C=1.0, gamma="scale", random_state=42)
lda = LinearDiscriminantAnalysis()


### 3.2 Original Data Results

In [9]:
eval_model(svm, X_scaled_pca, y, "SVM (Original)")
eval_model(lda, X_scaled_pca, y, "LDA (Original)")

### 3.3 Chi-Square Results

In [10]:
eval_model(svm, X_chi, y, "SVM (Chi-Square)")
eval_model(lda, X_chi, y, "LDA (Chi-Square)")

### 3.4 PCA Results

In [11]:
eval_model(svm, X_pca, y, "SVM (PCA)")
eval_model(lda, X_pca, y, "LDA (PCA)")

## Final Comparison Table

In [12]:
df_imad_results = pd.DataFrame(results_imad, columns=["Model", "Accuracy", "F1"])
df_imad_results.sort_values("F1", ascending=False)

Unnamed: 0,Model,Accuracy,F1
2,SVM (Chi-Square),0.961995,0.969062
3,LDA (Chi-Square),0.954869,0.963706
0,SVM (Original),0.951702,0.960872
1,LDA (Original),0.951108,0.960663
4,SVM (PCA),0.943389,0.954313
5,LDA (PCA),0.932502,0.945882
