<a href="https://colab.research.google.com/github/MehrDataPythonist/daily-dev-/blob/main/Untitled198.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

##سلول 1: وارد کردن کتابخانه‌ها و تنظیم داده‌ها



In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import urllib.request

# تنظیم seed برای تکرارپذیری
np.random.seed(42)
tf.random.set_seed(42)

# دانلود دیتاست سرطان سینه
url_bc = "https://archive.ics.uci.edu/ml/machine-learning-databases/breast-cancer-wisconsin/wdbc.data"
urllib.request.urlretrieve(url_bc, "wdbc.data")

# دانلود دیتاست دیابت
url_db = "https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv"
urllib.request.urlretrieve(url_db, "pima-indians-diabetes.csv")

# تابع خواندن و پیش‌پردازش داده‌های سرطان سینه
def load_and_preprocess_bc_data():
    column_names = ["id", "diagnosis"] + [f"feature_{i}" for i in range(1, 31)]
    data = pd.read_csv("wdbc.data", header=None, names=column_names)
    X = data.drop(["id", "diagnosis"], axis=1)
    y = (data.diagnosis == 'M').astype(int)

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled, y_train, y_test

# تابع خواندن و پیش‌پردازش داده‌های دیابت
def load_and_preprocess_db_data():
    column_names = ["pregnancies", "glucose", "blood_pressure", "skin_thickness", "insulin",
                    "bmi", "diabetes_pedigree_function", "age", "outcome"]
    data = pd.read_csv("pima-indians-diabetes.csv", header=None, names=column_names)
    X = data.drop("outcome", axis=1)
    y = data["outcome"]

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    return X_train_scaled, X_test_scaled, y_train, y_test

# خواندن داده‌های سرطان سینه
X_train_bc, X_test_bc, y_train_bc, y_test_bc = load_and_preprocess_bc_data()

# خواندن داده‌های دیابت
X_train_db, X_test_db, y_train_db, y_test_db = load_and_preprocess_db_data()

##سلول 2: تعریف توابع مدل و آموزش



In [None]:
# تابع ایجاد مدل پایه
def create_base_model(input_shape, num_classes):
    inputs = keras.Input(shape=(input_shape,))
    x = layers.Dense(64, activation='relu')(inputs)
    x = layers.Dense(32, activation='relu')(x)
    x = layers.Dense(16, activation='relu')(x)
    outputs = layers.Dense(num_classes, activation='sigmoid')(x)
    model = keras.Model(inputs=inputs, outputs=outputs)
    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    return model

# تابع آموزش و ارزیابی مدل
def train_and_evaluate_model(model, X_train, y_train, X_test, y_test, epochs=100, batch_size=32):
    history = model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_split=0.2, verbose=0)
    test_loss, test_accuracy = model.evaluate(X_test, y_test)
    return history, test_accuracy

# تابع هرس کردن ساده
def simple_prune_model(model, sparsity=0.5):
    def prune_layer(layer):
        if isinstance(layer, layers.Dense):
            weights = layer.get_weights()
            for i in range(len(weights)):
                if len(weights[i].shape) > 1:  # فقط وزن‌ها را هرس می‌کنیم، نه بایاس‌ها
                    flat_weights = weights[i].flatten()
                    threshold = np.percentile(np.abs(flat_weights), sparsity * 100)
                    mask = np.abs(weights[i]) > threshold
                    weights[i] *= mask
            layer.set_weights(weights)
    for layer in model.layers:
        prune_layer(layer)
    return model

##سلول 3: اجرای آزمایش‌ها



In [None]:
# اجرای آزمایش‌ها برای سرطان سینه
base_model_bc = create_base_model(X_train_bc.shape[1], 1)
history_bc, base_acc_bc = train_and_evaluate_model(base_model_bc, X_train_bc, y_train_bc, X_test_bc, y_test_bc)

pruned_model_bc = simple_prune_model(tf.keras.models.clone_model(base_model_bc))
pruned_model_bc.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history_pruned_bc, pruned_acc_bc = train_and_evaluate_model(pruned_model_bc, X_train_bc, y_train_bc, X_test_bc, y_test_bc)

# اجرای آزمایش‌ها برای دیابت
base_model_db = create_base_model(X_train_db.shape[1], 1)
history_db, base_acc_db = train_and_evaluate_model(base_model_db, X_train_db, y_train_db, X_test_db, y_test_db)

pruned_model_db = simple_prune_model(tf.keras.models.clone_model(base_model_db))
pruned_model_db.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history_pruned_db, pruned_acc_db = train_and_evaluate_model(pruned_model_db, X_train_db, y_train_db, X_test_db, y_test_db)

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9738 - loss: 0.1627 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9738 - loss: 0.2167 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7172 - loss: 1.0434 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7064 - loss: 1.1987 


##سلول 4: محاسبه اندازه مدل و زمان استنتاج



In [None]:
import time

# تابع محاسبه اندازه مدل
def get_model_size(model):
    return sum(tf.size(v).numpy() for v in model.trainable_variables) * 4 / (1024 * 1024)

# محاسبه اندازه مدل‌ها
base_size_bc = get_model_size(base_model_bc)
pruned_size_bc = get_model_size(pruned_model_bc)

base_size_db = get_model_size(base_model_db)
pruned_size_db = get_model_size(pruned_model_db)

# محاسبه زمان استنتاج
def measure_inference_time(model, X_test, num_runs=100):
    start_time = time.time()
    for _ in range(num_runs):
        model.predict(X_test)
    end_time = time.time()
    return (end_time - start_time) / num_runs

base_time_bc = measure_inference_time(base_model_bc, X_test_bc)
pruned_time_bc = measure_inference_time(pruned_model_bc, X_test_bc)

base_time_db = measure_inference_time(base_model_db, X_test_db)
pruned_time_db = measure_inference_time(pruned_model_db, X_test_db)

# محاسبه پیش‌بینی‌ها
base_pred_bc = base_model_bc.predict(X_test_bc)
pruned_pred_bc = pruned_model_bc.predict(X_test_bc)

base_pred_db = base_model_db.predict(X_test_db)
pruned_pred_db = pruned_model_db.predict(X_test_db)

# دریافت وزن‌ها
base_weights_bc = np.concatenate([layer.get_weights()[0].flatten() for layer in base_model_bc.layers if layer.get_weights()])
pruned_weights_bc = np.concatenate([layer.get_weights()[0].flatten() for layer in pruned_model_bc.layers if layer.get_weights()])

base_weights_db = np.concatenate([layer.get_weights()[0].flatten() for layer in base_model_db.layers if layer.get_weights()])
pruned_weights_db = np.concatenate([layer.get_weights()[0].flatten() for layer in pruned_model_db.layers if layer.get_weights()])

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3m

##سلول 5: تولید نمودارها و جداول



In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import roc_curve, auc, confusion_matrix
import pandas as pd

# General settings for plot aesthetics
plt.style.use('seaborn')
sns.set_palette("deep")
plt.rcParams['figure.figsize'] = [12, 8]
plt.rcParams['font.size'] = 14
plt.rcParams['axes.labelsize'] = 12
plt.rcParams['axes.titlesize'] = 16
plt.rcParams['xtick.labelsize'] = 12
plt.rcParams['ytick.labelsize'] = 12
plt.rcParams['legend.fontsize'] = 12
plt.rcParams['figure.titlesize'] = 20

# 1. Model Accuracy Comparison
def plot_accuracy_comparison():
    models = ['Base', 'Pruned']
    breast_cancer_acc = [base_acc_bc * 100, pruned_acc_bc * 100]
    diabetes_acc = [base_acc_db * 100, pruned_acc_db * 100]

    x = range(len(models))
    width = 0.35

    fig, ax = plt.subplots(figsize=(12, 8))
    bc_bars = ax.bar([i - width/2 for i in x], breast_cancer_acc, width, label='Breast Cancer', color='#4e79a7')
    db_bars = ax.bar([i + width/2 for i in x], diabetes_acc, width, label='Diabetes', color='#76b7b2')

    ax.set_ylabel('Accuracy (%)')
    ax.set_xlabel('Model Type')
    ax.set_title('Model Accuracy Comparison', fontsize=16)
    ax.set_xticks(x)
    ax.set_xticklabels(models)
    ax.legend()

    ax.set_ylim(60, 100)
    ax.yaxis.set_major_locator(plt.MultipleLocator(10))
    ax.yaxis.set_minor_locator(plt.MultipleLocator(5))

    for bars in [bc_bars, db_bars]:
        for bar in bars:
            height = bar.get_height()
            ax.text(bar.get_x() + bar.get_width()/2., height,
                    f'{height:.2f}%', ha='center', va='bottom')

    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.tight_layout()
    plt.savefig('accuracy_comparison_updated.png', dpi=300, bbox_inches='tight')
    plt.close()

# 2. Model Size and Inference Time Comparison
def plot_size_and_speed():
    models = ['Base', 'Pruned']
    bc_sizes = [base_size_bc, pruned_size_bc]
    db_sizes = [base_size_db, pruned_size_db]
    bc_times = [base_time_bc * 1000, pruned_time_bc * 1000]  # Convert to milliseconds
    db_times = [base_time_db * 1000, pruned_time_db * 1000]  # Convert to milliseconds

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8))

    ax1.bar(models, bc_sizes, label='Breast Cancer', alpha=0.8, color='#4e79a7')
    ax1.bar(models, db_sizes, bottom=bc_sizes, label='Diabetes', alpha=0.8, color='#76b7b2')
    ax1.set_ylabel('Model Size (MB)')
    ax1.set_title('Model Size Comparison')
    ax1.legend()

    ax2.bar(models, bc_times, label='Breast Cancer', alpha=0.8, color='#4e79a7')
    ax2.bar(models, db_times, bottom=bc_times, label='Diabetes', alpha=0.8, color='#76b7b2')
    ax2.set_ylabel('Inference Time (ms)')
    ax2.set_title('Inference Time Comparison')
    ax2.legend()

    for ax in [ax1, ax2]:
        for container in ax.containers:
            ax.bar_label(container, fmt='%.2f', label_type='center')

    plt.tight_layout()
    plt.savefig('size_and_speed_comparison_updated.png', dpi=300, bbox_inches='tight')
    plt.close()

# 3. ROC Curves and Confusion Matrices
def plot_roc_and_confusion():
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 16))

    # ROC Curve for Breast Cancer
    fpr, tpr, _ = roc_curve(y_test_bc, base_pred_bc)
    roc_auc = auc(fpr, tpr)
    ax1.plot(fpr, tpr, color='#4e79a7', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
    ax1.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    ax1.set_xlim([0.0, 1.0])
    ax1.set_ylim([0.0, 1.05])
    ax1.set_xlabel('False Positive Rate')
    ax1.set_ylabel('True Positive Rate')
    ax1.set_title('ROC Curve - Breast Cancer')
    ax1.legend(loc="lower right")

    # ROC Curve for Diabetes
    fpr, tpr, _ = roc_curve(y_test_db, base_pred_db)
    roc_auc = auc(fpr, tpr)
    ax2.plot(fpr, tpr, color='#4e79a7', lw=2, label=f'ROC curve (AUC = {roc_auc:.2f})')
    ax2.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    ax2.set_xlim([0.0, 1.0])
    ax2.set_ylim([0.0, 1.05])
    ax2.set_xlabel('False Positive Rate')
    ax2.set_ylabel('True Positive Rate')
    ax2.set_title('ROC Curve - Diabetes')
    ax2.legend(loc="lower right")

    # Confusion Matrix for Breast Cancer
    cm = confusion_matrix(y_test_bc, base_pred_bc.round())
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax3)
    ax3.set_xlabel('Predicted label')
    ax3.set_ylabel('True label')
    ax3.set_title('Confusion Matrix - Breast Cancer')

    # Confusion Matrix for Diabetes
    cm = confusion_matrix(y_test_db, base_pred_db.round())
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax4)
    ax4.set_xlabel('Predicted label')
    ax4.set_ylabel('True label')
    ax4.set_title('Confusion Matrix - Diabetes')

    plt.tight_layout()
    plt.savefig('roc_and_confusion_updated.png', dpi=300, bbox_inches='tight')
    plt.close()

# 4. Weight Distribution
def plot_weight_distribution():
    fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(16, 16))

    sns.histplot(base_weights_bc, kde=True, ax=ax1, color='#4e79a7')
    ax1.set_title('Base Model Weights - Breast Cancer')
    ax1.set_xlabel('Weight Value')

    sns.histplot(pruned_weights_bc, kde=True, ax=ax2, color='#59a14f')
    ax2.set_title('Pruned Model Weights - Breast Cancer')
    ax2.set_xlabel('Weight Value')

    sns.histplot(base_weights_db, kde=True, ax=ax3, color='#edc948')
    ax3.set_title('Base Model Weights - Diabetes')
    ax3.set_xlabel('Weight Value')

    sns.histplot(pruned_weights_db, kde=True, ax=ax4, color='#ff9da7')
    ax4.set_title('Pruned Model Weights - Diabetes')
    ax4.set_xlabel('Weight Value')

    plt.tight_layout()
    plt.savefig('weight_distribution_updated.png', dpi=300, bbox_inches='tight')
    plt.close()

# 5. Comprehensive Performance Comparison
def create_performance_table():
    data = {
        'Model': ['Base', 'Pruned'],
        'BC Accuracy (%)': [base_acc_bc * 100, pruned_acc_bc * 100],
        'DB Accuracy (%)': [base_acc_db * 100, pruned_acc_db * 100],
        'BC Model Size (MB)': [base_size_bc, pruned_size_bc],
        'DB Model Size (MB)': [base_size_db, pruned_size_db],
        'BC Inference Time (ms)': [base_time_bc * 1000, pruned_time_bc * 1000],
        'DB Inference Time (ms)': [base_time_db * 1000, pruned_time_db * 1000]
    }
    df = pd.DataFrame(data)
    df = df.round(2)

    fig, ax = plt.subplots(figsize=(12, 4))
    ax.axis('off')
    table = ax.table(cellText=df.values, colLabels=df.columns, cellLoc='center', loc='center')
    table.auto_set_font_size(False)
    table.set_fontsize(10)
    table.scale(1.2, 1.5)
    plt.title('Comprehensive Performance Comparison', fontsize=16)

    plt.tight_layout()
    plt.savefig('performance_table_updated.png', dpi=300, bbox_inches='tight')
    plt.close()

# Run the functions
plot_accuracy_comparison()
plot_size_and_speed()
plot_roc_and_confusion()
plot_weight_distribution()
create_performance_table()

  plt.style.use('seaborn')
