In [4]:
# ==========================================
# 1) استيراد المكتبات وقراءة البيانات
# ==========================================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

# تحميل ملف البيانات
data = pd.read_csv("brca.csv")

# ==========================================
# 2) تنظيف أسماء الأعمدة
# ==========================================
# إزالة البادئة "x." إن وجدت
data.columns = data.columns.str.replace("x.", "", regex=False)

# ==========================================
# 3) تجهيز العمود الهدف (y)
#    M = خبيث (1) / B = حميد (0)
# ==========================================
data["y"] = data["y"].map({"M": 1, "B": 0})

# ==========================================
# 4) اختيار الخصائص (Features)
# ==========================================
features = [
    "radius_mean", "texture_mean", "perimeter_mean",
    "area_mean", "smoothness_mean"
]

X = data[features]   # المدخلات
y = data["y"]        # الهدف

# ==========================================
# 5) تقسيم البيانات (70% تدريب / 30% اختبار)
# ==========================================
train_X, test_X, train_y, test_y = train_test_split(
    X, y, test_size=0.30, random_state=42
)

# ==========================================
# 6) إنشاء نموذج Decision Tree وتدريبه
# ==========================================
model = DecisionTreeClassifier(random_state=42)
model.fit(train_X, train_y)

# ==========================================
# 7) إجراء التنبؤات على مجموعة الاختبار
# ==========================================
predictions = model.predict(test_X)

# ==========================================
# 8) تقييم أداء النموذج
# ==========================================
accuracy  = accuracy_score(test_y, predictions)
conf_mat  = confusion_matrix(test_y, predictions)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_mat)


Accuracy: 0.9122807017543859
Confusion Matrix:
 [[104   4]
 [ 11  52]]
