In [None]:
# MNIST Veri Seti ile Makine Öğrenmesi ve Derin Öğrenme Model Karşılaştırması

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import train_test_split
import xgboost as xgb
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import warnings
warnings.filterwarnings('ignore')

# Veri setini yükleme ve hazırlama
print("MNIST veri seti yükleniyor...")
(x_train_full, y_train_full), (x_test, y_test) = keras.datasets.mnist.load_data()

# Veriyi normalize etme (0-1 arası)
x_train_full = x_train_full.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0

print(f"Eğitim verisi boyutu: {x_train_full.shape}")
print(f"Test verisi boyutu: {x_test.shape}")

# Makine öğrenmesi için veriyi düzleştirme
x_train_flat = x_train_full.reshape(x_train_full.shape[0], -1)
x_test_flat = x_test.reshape(x_test.shape[0], -1)

# Eğitim verisini daha küçük bir alt kümeye indirme (hız için)
# Tam veri seti çok büyük olduğu için ilk 10,000 örneği kullanacağız
n_samples = 10000
x_train_sample = x_train_flat[:n_samples]
y_train_sample = y_train_full[:n_samples]

print(f"\nÖrnekleme sonrası eğitim verisi boyutu: {x_train_sample.shape}")

# Confusion matrix görselleştirme fonksiyonu
def plot_confusion_matrix(y_true, y_pred, title, model_name):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=range(10), yticklabels=range(10))
    plt.title(f'{title} - {model_name}')
    plt.xlabel('Tahmin Edilen')
    plt.ylabel('Gerçek')
    plt.tight_layout()
    plt.show()
    
    # Accuracy hesaplama
    accuracy = accuracy_score(y_true, y_pred)
    print(f"\n{model_name} Accuracy: {accuracy:.4f}")
    
    return cm

# 1. RANDOM FOREST MODELİ
print("\n" + "="*50)
print("1. RANDOM FOREST MODELİ EĞİTİLİYOR")
print("="*50)

rf_model = RandomForestClassifier(
    n_estimators=100,
    random_state=42,
    n_jobs=-1,
    verbose=1
)

print("Random Forest eğitimi başlıyor...")
rf_model.fit(x_train_sample, y_train_sample)

print("Random Forest tahminleri yapılıyor...")
rf_predictions = rf_model.predict(x_test_flat)

# Random Forest sonuçları
print("\nRandom Forest Classification Report:")
print(classification_report(y_test, rf_predictions))

# Random Forest Confusion Matrix
rf_cm = plot_confusion_matrix(y_test, rf_predictions, 
                             "Confusion Matrix", "Random Forest")

# 2. XGBOOST MODELİ
print("\n" + "="*50)
print("2. XGBOOST MODELİ EĞİTİLİYOR")
print("="*50)

xgb_model = xgb.XGBClassifier(
    objective='multi:softprob',
    num_class=10,
    n_estimators=100,
    random_state=42,
    verbosity=1
)

print("XGBoost eğitimi başlıyor...")
xgb_model.fit(x_train_sample, y_train_sample)

print("XGBoost tahminleri yapılıyor...")
xgb_predictions = xgb_model.predict(x_test_flat)

# XGBoost sonuçları
print("\nXGBoost Classification Report:")
print(classification_report(y_test, xgb_predictions))

# XGBoost Confusion Matrix
xgb_cm = plot_confusion_matrix(y_test, xgb_predictions, 
                              "Confusion Matrix", "XGBoost")

# 3. CNN (CONVOLUTIONAL NEURAL NETWORK) MODELİ
print("\n" + "="*50)
print("3. CNN MODELİ EĞİTİLİYOR")
print("="*50)

# CNN için veriyi yeniden şekillendirme
x_train_cnn = x_train_full[:n_samples].reshape(-1, 28, 28, 1)
x_test_cnn = x_test.reshape(-1, 28, 28, 1)
y_train_cnn = y_train_full[:n_samples]

# CNN modelini oluşturma
cnn_model = keras.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

# CNN modelini derleme
cnn_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

print("CNN Model Mimarisi:")
cnn_model.summary()

# CNN modelini eğitme
print("\nCNN eğitimi başlıyor...")
history = cnn_model.fit(
    x_train_cnn, y_train_cnn,
    epochs=10,
    batch_size=128,
    validation_split=0.2,
    verbose=1
)

# CNN tahminleri
print("CNN tahminleri yapılıyor...")
cnn_predictions_proba = cnn_model.predict(x_test_cnn)
cnn_predictions = np.argmax(cnn_predictions_proba, axis=1)

# CNN sonuçları
print("\nCNN Classification Report:")
print(classification_report(y_test, cnn_predictions))

# CNN Confusion Matrix
cnn_cm = plot_confusion_matrix(y_test, cnn_predictions, 
                              "Confusion Matrix", "CNN")

# CNN eğitim geçmişini görselleştirme
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Eğitim Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('CNN Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Eğitim Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('CNN Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.tight_layout()
plt.show()

# 4. MODEL KARŞILAŞTIRMASI
print("\n" + "="*50)
print("4. MODEL KARŞILAŞTIRMA SONUÇLARI")
print("="*50)

# Accuracy değerlerini karşılaştırma
rf_accuracy = accuracy_score(y_test, rf_predictions)
xgb_accuracy = accuracy_score(y_test, xgb_predictions)
cnn_accuracy = accuracy_score(y_test, cnn_predictions)

results_df = pd.DataFrame({
    'Model': ['Random Forest', 'XGBoost', 'CNN'],
    'Accuracy': [rf_accuracy, xgb_accuracy, cnn_accuracy]
})

print("\nModel Accuracy Karşılaştırması:")
print(results_df)

# Sonuçları görselleştirme
plt.figure(figsize=(10, 6))
bars = plt.bar(results_df['Model'], results_df['Accuracy'], 
               color=['skyblue', 'lightgreen', 'lightcoral'])
plt.title('Model Accuracy Karşılaştırması')
plt.ylabel('Accuracy')
plt.ylim(0, 1)

# Bar üzerinde değerleri gösterme
for bar, accuracy in zip(bars, results_df['Accuracy']):
    plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.01,
             f'{accuracy:.4f}', ha='center', va='bottom')

plt.tight_layout()
plt.show()

# Örnek tahminleri görselleştirme
def show_sample_predictions(model_name, predictions, n_samples=10):
    plt.figure(figsize=(15, 6))
    for i in range(n_samples):
        plt.subplot(2, 5, i+1)
        plt.imshow(x_test[i], cmap='gray')
        plt.title(f'Gerçek: {y_test[i]}, Tahmin: {predictions[i]}')
        plt.axis('off')
    plt.suptitle(f'{model_name} - Örnek Tahminler')
    plt.tight_layout()
    plt.show()

# Her model için örnek tahminleri gösterme
show_sample_predictions('Random Forest', rf_predictions)
show_sample_predictions('XGBoost', xgb_predictions)
show_sample_predictions('CNN', cnn_predictions)

print("\n" + "="*50)
print("ANALİZ TAMAMLANDI!")
print("="*50)
print(f"En iyi performans gösteren model: {results_df.loc[results_df['Accuracy'].idxmax(), 'Model']}")
print(f"En yüksek accuracy: {results_df['Accuracy'].max():.4f}")