# Feature Engineering - Temel Kavramlar

Bu notebook, feature engineering tekniklerini ve uygulamalarını içerir.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, LabelEncoder, OneHotEncoder
from sklearn.feature_selection import SelectKBest, f_classif, chi2, RFE
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import make_classification

plt.style.use('seaborn-v0_8')
sns.set_palette("husl")
%matplotlib inline


## 1. Feature Scaling

Farklı scaling yöntemlerini karşılaştırma.


In [None]:
# Örnek veri
X, y = make_classification(n_samples=1000, n_features=5, random_state=42)
df = pd.DataFrame(X, columns=[f'Feature_{i+1}' for i in range(5)])

# Farklı scaling yöntemleri
scalers = {
    'Original': None,
    'StandardScaler': StandardScaler(),
    'MinMaxScaler': MinMaxScaler(),
    'RobustScaler': RobustScaler()
}

scaled_data = {}
for name, scaler in scalers.items():
    if scaler is None:
        scaled_data[name] = df
    else:
        scaled_data[name] = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)

# İstatistikleri karşılaştır
print("Feature Scaling Karşılaştırması:")
for name, data in scaled_data.items():
    print(f"\n{name}:")
    print(f"  Mean: {data.mean().values}")
    print(f"  Std:  {data.std().values}")
    print(f"  Min:  {data.min().values}")
    print(f"  Max:  {data.max().values}")


## 2. Feature Selection

En önemli feature'ları seçme teknikleri.


In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 1. SelectKBest (Univariate Selection)
selector_kbest = SelectKBest(score_func=f_classif, k=3)
X_train_selected = selector_kbest.fit_transform(X_train, y_train)
X_test_selected = selector_kbest.transform(X_test)

selected_features = selector_kbest.get_support()
print("SelectKBest ile seçilen feature'lar:")
print([f'Feature_{i+1}' for i, selected in enumerate(selected_features) if selected])

# 2. RFE (Recursive Feature Elimination)
rfe = RFE(estimator=RandomForestClassifier(n_estimators=50, random_state=42), n_features_to_select=3)
X_train_rfe = rfe.fit_transform(X_train, y_train)
X_test_rfe = rfe.transform(X_test)

rfe_features = rfe.get_support()
print("\nRFE ile seçilen feature'lar:")
print([f'Feature_{i+1}' for i, selected in enumerate(rfe_features) if selected])

# 3. Feature Importance (Tree-based)
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
feature_importance = pd.DataFrame({
    'Feature': [f'Feature_{i+1}' for i in range(len(rf.feature_importances_))],
    'Importance': rf.feature_importances_
}).sort_values('Importance', ascending=False)

print("\nFeature Importance (Top 3):")
print(feature_importance.head(3))


## 3. Encoding Teknikleri

Kategorik değişkenleri encode etme yöntemleri.


In [None]:
# Örnek kategorik veri
df_cat = pd.DataFrame({
    'Category': ['A', 'B', 'C', 'A', 'B', 'C', 'A'],
    'Size': ['Small', 'Medium', 'Large', 'Small', 'Medium', 'Large', 'Small']
})

# 1. Label Encoding
le = LabelEncoder()
df_cat['Category_LabelEncoded'] = le.fit_transform(df_cat['Category'])
print("Label Encoding:")
print(df_cat[['Category', 'Category_LabelEncoded']].head())

# 2. One-Hot Encoding
df_onehot = pd.get_dummies(df_cat[['Category', 'Size']], prefix=['Cat', 'Size'])
print("\nOne-Hot Encoding:")
print(df_onehot.head())
