In [None]:
import numpy as np
from scipy import stats

# תוצאות Baseline (מההדפסות)
baseline_results = {
    'mt5-mlm-final': {'auc': 0.5432, 'f1': 0.5097, 'accuracy': 0.3586},
    'mt5-base': {'auc': 0.4801, 'f1': 0.5000, 'accuracy': 0.3333}
}

# תוצאות 10-fold מההדפסות
mt5_mlm_final_results = {
    'auc': [0.8093, 0.7869, 0.8408, 0.8589, 0.8411, 0.7926, 0.8631, 0.8434, 0.8649, 0.8120],
    'f1': [0.5950, 0.5000, 0.6465, 0.7100, 0.6927, 0.5000, 0.6667, 0.6989, 0.6854, 0.0000],
    'accuracy': [0.6103, 0.3333, 0.7979, 0.7841, 0.7962, 0.3333, 0.7098, 0.7634, 0.7288, 0.6649]
}

mt5_base_results = {
    'auc': [0.5273, 0.4400, 0.5566, 0.4678, 0.5009, 0.5503, 0.4915, 0.5474, 0.6230, 0.4616],
    'f1': [0.5013, 0.5013, 0.5027, 0.4158, 0.5006, 0.5000, 0.5000, 0.5026, 0.5019, 0.5019],
    'accuracy': [0.3345, 0.3368, 0.3610, 0.4128, 0.3351, 0.3333, 0.3333, 0.3368, 0.3351, 0.3351]
}

def calculate_statistics():
    """חישוב כל הסטטיסטיקות הנדרשות"""
    
    print("🎯 COMPLETE STATISTICAL ANALYSIS")
    print("=" * 60)
    
    # 1. חישוב ממוצעים וסטיות תקן
    mlm_auc_mean = np.mean(mt5_mlm_final_results['auc'])
    mlm_auc_std = np.std(mt5_mlm_final_results['auc'], ddof=1)
    base_auc_mean = np.mean(mt5_base_results['auc'])
    base_auc_std = np.std(mt5_base_results['auc'], ddof=1)
    
    mlm_f1_mean = np.mean(mt5_mlm_final_results['f1'])
    mlm_f1_std = np.std(mt5_mlm_final_results['f1'], ddof=1)
    base_f1_mean = np.mean(mt5_base_results['f1'])
    base_f1_std = np.std(mt5_base_results['f1'], ddof=1)
    
    mlm_acc_mean = np.mean(mt5_mlm_final_results['accuracy'])
    mlm_acc_std = np.std(mt5_mlm_final_results['accuracy'], ddof=1)
    base_acc_mean = np.mean(mt5_base_results['accuracy'])
    base_acc_std = np.std(mt5_base_results['accuracy'], ddof=1)
    
    # 2. בדיקות סטטיסטיות (paired t-tests)
    auc_t_stat, auc_p_value = stats.ttest_rel(mt5_mlm_final_results['auc'], mt5_base_results['auc'])
    f1_t_stat, f1_p_value = stats.ttest_rel(mt5_mlm_final_results['f1'], mt5_base_results['f1'])
    acc_t_stat, acc_p_value = stats.ttest_rel(mt5_mlm_final_results['accuracy'], mt5_base_results['accuracy'])
    
    # 3. חישוב Cohen's d
    def calculate_cohens_d(group1, group2):
        """חישוב Cohen's d עבור paired samples"""
        diff = np.array(group1) - np.array(group2)
        return np.mean(diff) / np.std(diff, ddof=1)
    
    auc_cohens_d = calculate_cohens_d(mt5_mlm_final_results['auc'], mt5_base_results['auc'])
    f1_cohens_d = calculate_cohens_d(mt5_mlm_final_results['f1'], mt5_base_results['f1'])
    acc_cohens_d = calculate_cohens_d(mt5_mlm_final_results['accuracy'], mt5_base_results['accuracy'])
    
    # 4. פונקציה לעיצוב מובהקות
    def format_significance(p_val):
        if p_val < 0.001:
            return "***"
        elif p_val < 0.01:
            return "**"
        elif p_val < 0.05:
            return "*"
        else:
            return "ns"
    
    def interpret_cohens_d(d):
        abs_d = abs(d)
        if abs_d < 0.2:
            return "small"
        elif abs_d < 0.8:
            return "medium"
        else:
            return "large"
    
    # 5. הדפסת תוצאות
    print("\n1️⃣ BASELINE RESULTS (No Training)")
    print("-" * 50)
    print(f"{'Model':<20} {'AUC':<8} {'F1':<8} {'Accuracy':<8}")
    print("-" * 50)
    print(f"{'mt5-mlm-final':<20} {baseline_results['mt5-mlm-final']['auc']:<8.4f} {baseline_results['mt5-mlm-final']['f1']:<8.4f} {baseline_results['mt5-mlm-final']['accuracy']:<8.4f}")
    print(f"{'mt5-base':<20} {baseline_results['mt5-base']['auc']:<8.4f} {baseline_results['mt5-base']['f1']:<8.4f} {baseline_results['mt5-base']['accuracy']:<8.4f}")
    
    baseline_auc_diff = baseline_results['mt5-mlm-final']['auc'] - baseline_results['mt5-base']['auc']
    print(f"\nBaseline AUC Difference: {baseline_auc_diff:.4f}")
    
    print("\n2️⃣ TRAINED RESULTS (10-Fold Cross-Validation)")
    print("-" * 80)
    print(f"{'Metric':<12} {'mt5-mlm-final':<18} {'mt5-base':<18} {'Difference':<12} {'p-value':<10} {'Cohens d':<10} {'Effect':<8}")
    print("-" * 80)
    
    print(f"{'AUC':<12} {mlm_auc_mean:<10.4f}±{mlm_auc_std:<10.4f} {base_auc_mean:<10.4f}±{base_auc_std:<10.4f} {(mlm_auc_mean - base_auc_mean):<12.4f} {auc_p_value:<10.6f} {auc_cohens_d:<10.2f} {interpret_cohens_d(auc_cohens_d):<8}")
    print(f"{'F1':<12} {mlm_f1_mean:<10.4f}±{mlm_f1_std:<10.4f} {base_f1_mean:<10.4f}±{base_f1_std:<10.4f} {(mlm_f1_mean - base_f1_mean):<12.4f} {f1_p_value:<10.6f} {f1_cohens_d:<10.2f} {interpret_cohens_d(f1_cohens_d):<8}")
    print(f"{'Accuracy':<12} {mlm_acc_mean:<10.4f}±{mlm_acc_std:<10.4f} {base_acc_mean:<10.4f}±{base_acc_std:<10.4f} {(mlm_acc_mean - base_acc_mean):<12.4f} {acc_p_value:<10.6f} {acc_cohens_d:<10.2f} {interpret_cohens_d(acc_cohens_d):<8}")
        
    print("\n3️⃣ IMPROVEMENT FROM BASELINE")
    print("-" * 50)
    mlm_auc_improvement = mlm_auc_mean - baseline_results['mt5-mlm-final']['auc']
    base_auc_improvement = base_auc_mean - baseline_results['mt5-base']['auc']
    
    print(f"mt5-mlm-final AUC improvement: {mlm_auc_improvement:.4f}")
    print(f"mt5-base AUC improvement: {base_auc_improvement:.4f}")
    print(f"Relative improvement advantage: {mlm_auc_improvement - base_auc_improvement:.4f}")
    
    print("\n4️⃣ DETAILED STATISTICAL RESULTS")
    print("-" * 50)
    print(f"AUC comparison:")
    print(f"  • t-statistic: {auc_t_stat:.4f}")
    print(f"  • p-value: {auc_p_value:.6f} {format_significance(auc_p_value)}")
    print(f"  • Cohen's d: {auc_cohens_d:.4f} ({interpret_cohens_d(auc_cohens_d)} effect)")
    print(f"  • 95% CI for difference: [{(mlm_auc_mean - base_auc_mean) - 1.96 * np.std(np.array(mt5_mlm_final_results['auc']) - np.array(mt5_base_results['auc']))/np.sqrt(10):.4f}, {(mlm_auc_mean - base_auc_mean) + 1.96 * np.std(np.array(mt5_mlm_final_results['auc']) - np.array(mt5_base_results['auc']))/np.sqrt(10):.4f}]")
    
    print(f"\nF1 comparison:")
    print(f"  • t-statistic: {f1_t_stat:.4f}")
    print(f"  • p-value: {f1_p_value:.6f} {format_significance(f1_p_value)}")
    print(f"  • Cohen's d: {f1_cohens_d:.4f} ({interpret_cohens_d(f1_cohens_d)} effect)")
    
    print(f"\nAccuracy comparison:")
    print(f"  • t-statistic: {acc_t_stat:.4f}")
    print(f"  • p-value: {acc_p_value:.6f} {format_significance(acc_p_value)}")
    print(f"  • Cohen's d: {acc_cohens_d:.4f} ({interpret_cohens_d(acc_cohens_d)} effect)")
    
    print("\n5️⃣ INTERPRETATION")
    print("-" * 50)
    print("Significance levels: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant")
    print("Effect sizes: |d| < 0.2 (small), 0.2-0.8 (medium), > 0.8 (large)")
    
    if auc_p_value < 0.05:
        print(f"\n✅ SIGNIFICANT RESULT: mt5-mlm-final significantly outperforms mt5-base")
        print(f"   AUC difference: {mlm_auc_mean - base_auc_mean:.4f} (p={auc_p_value:.4f})")
        print(f"   Effect size: {interpret_cohens_d(auc_cohens_d)} (d={auc_cohens_d:.2f})")
    else:
        print(f"\n❌ NON-SIGNIFICANT: No significant difference between models")
    
    # החזרת התוצאות כטבלה
    return {
        'baseline_auc_diff': baseline_auc_diff,
        'trained_auc_diff': mlm_auc_mean - base_auc_mean,
        'auc_p_value': auc_p_value,
        'auc_cohens_d': auc_cohens_d,
        'f1_p_value': f1_p_value,
        'f1_cohens_d': f1_cohens_d,
        'acc_p_value': acc_p_value,
        'acc_cohens_d': acc_cohens_d,
        'mlm_auc_mean': mlm_auc_mean,
        'base_auc_mean': base_auc_mean,
        'mlm_improvement': mlm_auc_improvement,
        'base_improvement': base_auc_improvement
    }

# הרץ את החישוב
results = calculate_statistics()

🎯 COMPLETE STATISTICAL ANALYSIS

1️⃣ BASELINE RESULTS (No Training)
--------------------------------------------------
Model                AUC      F1       Accuracy
--------------------------------------------------
mt5-mlm-final        0.5432   0.5097   0.3586  
mt5-base             0.4801   0.5000   0.3333  

Baseline AUC Difference: 0.0631

2️⃣ TRAINED RESULTS (10-Fold Cross-Validation)
--------------------------------------------------------------------------------
Metric       mt5-mlm-final      mt5-base           Difference   p-value    Cohens d   Effect  
--------------------------------------------------------------------------------
AUC          0.8313    ±0.0290     0.5166    ±0.0550     0.3147       0.000000   5.98       large   
F1           0.5695    ±0.2148     0.4928    ±0.0271     0.0767       0.304147   0.34       medium  
Accuracy     0.6522    ±0.1783     0.3454    ±0.0251     0.3068       0.000306   1.79       large   

3️⃣ IMPROVEMENT FROM BASELINE
--------------

: 