# Feature Engineering - Pratik Alıştırmalar

Bu notebook, feature engineering tekniklerini pratik yapmak için alıştırmalar içerir.


## Alıştırma 1: Feature Scaling Etkisini Test Etme

Farklı scaling yöntemlerinin model performansına etkisini karşılaştırın.


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification

# Veri seti
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Farklı scaling yöntemleri ile model performansını karşılaştır
scalers = {
    'No Scaling': None,
    'StandardScaler': StandardScaler(),
    'MinMaxScaler': MinMaxScaler(),
    'RobustScaler': RobustScaler()
}

results = []
for scaler_name, scaler in scalers.items():
    if scaler is None:
        X_train_scaled = X_train
        X_test_scaled = X_test
    else:
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
    
    # Logistic Regression
    lr = LogisticRegression(random_state=42, max_iter=1000)
    lr.fit(X_train_scaled, y_train)
    lr_pred = lr.predict(X_test_scaled)
    lr_acc = accuracy_score(y_test, lr_pred)
    
    # SVM
    svm = SVC(random_state=42)
    svm.fit(X_train_scaled, y_train)
    svm_pred = svm.predict(X_test_scaled)
    svm_acc = accuracy_score(y_test, svm_pred)
    
    results.append({
        'Scaling Method': scaler_name,
        'Logistic Regression': lr_acc,
        'SVM': svm_acc
    })

results_df = pd.DataFrame(results)
print("Scaling Yöntemlerinin Model Performansına Etkisi:")
print(results_df.to_string(index=False))


## Alıştırma 2: Feature Selection ile Model Performansını İyileştirme

Feature selection tekniklerini kullanarak en önemli feature'ları seçin ve model performansını karşılaştırın.


In [None]:
from sklearn.feature_selection import SelectKBest, f_classif, RFE
from sklearn.ensemble import RandomForestClassifier

# Feature selection ile model performansını karşılaştır
# 1. Tüm feature'lar
rf_all = RandomForestClassifier(n_estimators=100, random_state=42)
rf_all.fit(X_train, y_train)
acc_all = accuracy_score(y_test, rf_all.predict(X_test))

# 2. SelectKBest (top 5 features)
selector = SelectKBest(score_func=f_classif, k=5)
X_train_selected = selector.fit_transform(X_train, y_train)
X_test_selected = selector.transform(X_test)
rf_selected = RandomForestClassifier(n_estimators=100, random_state=42)
rf_selected.fit(X_train_selected, y_train)
acc_selected = accuracy_score(y_test, rf_selected.predict(X_test_selected))

# 3. RFE (top 5 features)
rfe = RFE(estimator=RandomForestClassifier(n_estimators=50, random_state=42), n_features_to_select=5)
X_train_rfe = rfe.fit_transform(X_train, y_train)
X_test_rfe = rfe.transform(X_test)
rf_rfe = RandomForestClassifier(n_estimators=100, random_state=42)
rf_rfe.fit(X_train_rfe, y_train)
acc_rfe = accuracy_score(y_test, rf_rfe.predict(X_test_rfe))

print("Feature Selection Sonuçları:")
print(f"Tüm Feature'lar:        {acc_all:.4f}")
print(f"SelectKBest (5 feature): {acc_selected:.4f}")
print(f"RFE (5 feature):         {acc_rfe:.4f}")
