In [6]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import classification_report, confusion_matrix

from imblearn.over_sampling import SMOTE

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical

In [9]:
# 1. Veri Yükleme
data = pd.read_csv('/content/Student performance in mathematics.csv')

In [10]:
# 2. Hedef Değişken Oluşturma
data['average_score'] = (data['Math score'] + data['Reading score'] + data['Writing score']) / 3

def assign_label(avg):
    if avg < 60:
        return 0  # Başarısız
    elif avg < 80:
        return 1  # Orta
    else:
        return 2  # Başarılı

data['label'] = data['average_score'].apply(assign_label)

In [11]:
# 3. Özellik ve hedef ayrımı
X = data[['Gender', 'Race/ethnicity', 'Parental level of education', 'Lunch', 'Test preparation course',
          'Math score', 'Reading score', 'Writing score']]
y = data['label']

In [12]:
# 4. Preprocessing
categorical_cols = ['Gender', 'Race/ethnicity', 'Parental level of education', 'Lunch', 'Test preparation course']
numeric_cols = ['Math score', 'Reading score', 'Writing score']

preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(drop='first'), categorical_cols),
    ('num', StandardScaler(), numeric_cols)
])

X_processed = preprocessor.fit_transform(X)

In [13]:
# 5. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_processed, y, test_size=0.4, random_state=42, stratify=y)

print("Eğitim sınıf dağılımı:", pd.Series(y_train).value_counts(normalize=True))
print("Test sınıf dağılımı:", pd.Series(y_test).value_counts(normalize=True))

Eğitim sınıf dağılımı: label
1   0.49
0   0.26
2   0.25
Name: proportion, dtype: float64
Test sınıf dağılımı: label
1   0.48
0   0.27
2   0.25
Name: proportion, dtype: float64


In [19]:
# 6. SMOTE ile Sınıf Dengesi
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

# Veri Dağılımını Kontrol Et
print("SMOTE sonrası sınıf dağılımı:", pd.Series(y_train).value_counts())

SMOTE sonrası sınıf dağılımı: label
0    61
1    61
2    61
Name: count, dtype: int64


In [14]:
# 7. One-hot Encoding for y
y_train_categorical = to_categorical(y_train, num_classes=3)
y_test_categorical = to_categorical(y_test, num_classes=3)

Lazy Prediction

In [1]:
!pip install lazypredict

Collecting lazypredict
  Downloading lazypredict-0.2.16-py2.py3-none-any.whl.metadata (13 kB)
Collecting pytest-runner (from lazypredict)
  Downloading pytest_runner-6.0.1-py3-none-any.whl.metadata (7.3 kB)
Collecting mlflow>=2.0.0 (from lazypredict)
  Downloading mlflow-2.22.0-py3-none-any.whl.metadata (30 kB)
Collecting mlflow-skinny==2.22.0 (from mlflow>=2.0.0->lazypredict)
  Downloading mlflow_skinny-2.22.0-py3-none-any.whl.metadata (31 kB)
Collecting alembic!=1.10.0,<2 (from mlflow>=2.0.0->lazypredict)
  Downloading alembic-1.15.2-py3-none-any.whl.metadata (7.3 kB)
Collecting docker<8,>=4.0.0 (from mlflow>=2.0.0->lazypredict)
  Downloading docker-7.1.0-py3-none-any.whl.metadata (3.8 kB)
Collecting graphene<4 (from mlflow>=2.0.0->lazypredict)
  Downloading graphene-3.4.3-py2.py3-none-any.whl.metadata (6.9 kB)
Collecting gunicorn<24 (from mlflow>=2.0.0->lazypredict)
  Downloading gunicorn-23.0.0-py3-none-any.whl.metadata (4.4 kB)
Collecting databricks-sdk<1,>=0.20.0 (from mlflow-ski

In [15]:
# LazyPredict için gerekli import
from lazypredict.Supervised import LazyClassifier

In [16]:
# LazyPredict ile Klasik Modellerin Performansını Ölçelim
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)

In [17]:
# LazyPredict Eğitimi
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

  0%|          | 0/32 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000643 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 124
[LightGBM] [Info] Number of data points in the train set: 124, number of used features: 12
[LightGBM] [Info] Start training from score -1.354546
[LightGBM] [Info] Start training from score -0.709408
[LightGBM] [Info] Start training from score -1.386294


In [18]:
# Modellerin Performansları
print("\nLazyPredict Modellerinin Performansı:\n")
models


LazyPredict Modellerinin Performansı:



Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AdaBoostClassifier,1.0,1.0,,1.0,0.16
XGBClassifier,0.99,0.99,,0.99,0.17
BaggingClassifier,0.99,0.98,,0.99,0.06
DecisionTreeClassifier,0.99,0.98,,0.99,0.02
LogisticRegression,0.99,0.98,,0.99,0.03
RandomForestClassifier,0.99,0.98,,0.99,0.22
ExtraTreesClassifier,0.98,0.98,,0.98,0.17
LGBMClassifier,0.98,0.98,,0.98,0.14
LinearDiscriminantAnalysis,0.96,0.97,,0.96,0.05
CalibratedClassifierCV,0.96,0.95,,0.96,0.09
