In [None]:
# --- 2. MODEL TRAINING & EVALUATION ---
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay
import joblib
import matplotlib.pyplot as plt

# 1. Veri Yükleme
url = "https://raw.githubusercontent.com/KullaniciAdiniz/Metabolic-SERS-Diagnosis/main/metabolic_scores_final.csv"
df = pd.read_csv(url)

if 'Sample' in df.columns:
    df = df.drop(columns=['Sample'])

# 2. Veri Hazırlığı (X ve y)
X = df.drop(columns=['Cancer'])
y = df['Cancer']

# Train/Test Split (%70-%30)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

print(f"Eğitim Seti: {X_train.shape}, Test Seti: {X_test.shape}")

# 3. Model Eğitimi (Random Forest)
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# 4. Model Değerlendirme
y_pred = rf_model.predict(X_test)
print("\n--- Model Performansı ---")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nSınıflandırma Raporu:\n", classification_report(y_test, y_pred))

# 5. Confusion Matrix Görselleştirme
cm = confusion_matrix(y_test, y_pred, labels=rf_model.classes_)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=rf_model.classes_)
disp.plot(cmap='Blues')
plt.title("Confusion Matrix")
plt.show()

# 6. Modeli Kaydetme
joblib.dump(rf_model, "cancer_diagnosis_model.pkl")
print("Model dosyası (.pkl) başarıyla kaydedildi.")