In [23]:
# TODO

# 1.

In [24]:
import random
import time
from collections import defaultdict, Counter
from math import factorial, comb, sqrt
from statistics import mean, stdev
import numpy as np
from scipy.stats import chi2_contingency, chisquare

import combobgen
from combobgen import *

# print("Путь к модулю:", combobgen.__file__)
# print("Загрузчик:", combobgen.__loader__)

# print(dir(combobgen))

# misc

In [29]:
def gen_test_file(algorithm, fname='random_alg_test.txt', num_samples=1000, **kwargs):
    with open(f'{fname}', 'w') as f:
        for _ in range(num_samples):
            res = algorithm(**kwargs)
            s = ''
            for ch in res:
                s += str(ch)
            f.write(s + '\n')

In [31]:
arr = [secrets.randbelow(2) for i in range(100)]

print(arr)

gen_test_file(generate_random_permutation_fisher_yates, fname='haha', num_samples=100, arr=arr)

[1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]


# Векторы веса t

In [5]:
def test_binary_vector_uniformity(algorithm, n, t, num_samples=10000):
    """
    Тест равномерности распределения двоичных векторов
    """
    position_counts = [0] * n
    
    for _ in range(num_samples):
        vector = algorithm(n, t)
        for i, bit in enumerate(vector):
            if bit == 1:
                position_counts[i] += 1
    
    # Хи-квадрат тест для позиций
    expected_freq = num_samples * t / n
    chi2_stat = sum((obs - expected_freq)**2 / expected_freq for obs in position_counts)
    
    return [
        ("samples_tested", num_samples),
        ("expected_frequency_per_position", f"{expected_freq:.2f}"),
        ("chi2_statistic", f"{chi2_stat:.4f}"),
        ("degrees_of_freedom", n - 1),
        ("uniformity_passed", chi2_stat < 3.841 * (n-1))
    ]

def test_binary_vector_properties(algorithm, n, t, num_samples=10000):
    """
    Тест основных свойств двоичных векторов
    """
    wrong_lengths = 0
    wrong_weights = 0
    invalid_bits = 0
    
    for _ in range(num_samples):
        vector = algorithm(n, t)
        
        # Проверка длины
        if len(vector) != n:
            wrong_lengths += 1
            
        # Проверка веса
        if sum(vector) != t:
            wrong_weights += 1
            
        # Проверка корректности битов
        if not all(bit in [0, 1] for bit in vector):
            invalid_bits += 1
    
    return [
        ("samples_tested", num_samples),
        ("wrong_lengths", wrong_lengths),
        ("wrong_weights", wrong_weights),
        ("invalid_bits", invalid_bits),
        ("success_rate", f"{(num_samples - wrong_lengths - wrong_weights - invalid_bits) / num_samples:.4f}")
    ]

def test_binary_vector_distance(algorithm, n, t, num_samples=1000):
    """
    Тест распределения расстояний Хэмминга между векторами
    """
    vectors = []
    for _ in range(num_samples):
        vectors.append(algorithm(n, t))
    
    # Вычисление расстояний Хэмминга
    distances = []
    for i in range(min(100, len(vectors))):  # Ограничиваем для производительности
        for j in range(i+1, min(100, len(vectors))):
            hamming_dist = sum(a != b for a, b in zip(vectors[i], vectors[j]))
            distances.append(hamming_dist)
    
    if distances:
        avg_distance = mean(distances)
        min_distance = min(distances)
        max_distance = max(distances)
        
        return [
            ("vector_pairs_tested", len(distances)),
            ("avg_hamming_distance", f"{avg_distance:.2f}"),
            ("min_hamming_distance", min_distance),
            ("max_hamming_distance", max_distance),
            ("expected_avg_distance", f"{2*t*(n-t)/n:.2f}")
        ]
    else:
        return [("error", "No distances calculated")]

def test_binary_vector_performance(algorithm, n, t, num_samples=1000):
    """
    Тест производительности алгоритма двоичных векторов
    """
    times = []
    
    for _ in range(num_samples):
        start_time = time.time()
        vector = algorithm(n, t)
        end_time = time.time()
        times.append(end_time - start_time)
    
    return [
        ("samples_tested", num_samples),
        ("avg_time_seconds", f"{mean(times):.6f}"),
        ("min_time_seconds", f"{min(times):.6f}"),
        ("max_time_seconds", f"{max(times):.6f}"),
        ("std_time_seconds", f"{stdev(times):.6f}")
    ]


# Выборки k из n

In [6]:
def test_sampling_uniformity(algorithm, n, k, num_samples=10000):
    """
    Тест равномерности распределения элементов в выборке
    """
    element_counts = [0] * n
    
    for _ in range(num_samples):
        sample = algorithm(list(range(n)), k)
        for elem in sample:
            element_counts[elem] += 1
    
    # Хи-квадрат тест
    expected_freq = num_samples * k / n
    chi2_stat = sum((obs - expected_freq)**2 / expected_freq for obs in element_counts)
    
    return [
        ("samples_tested", num_samples),
        ("expected_frequency", f"{expected_freq:.2f}"),
        ("chi2_statistic", f"{chi2_stat:.4f}"),
        ("degrees_of_freedom", n - 1),
        ("uniformity_passed", chi2_stat < 3.841 * (n-1))
    ]

def test_sampling_properties(algorithm, n, k, num_samples=10000):
    """
    Тест основных свойств выборки
    """
    invalid_samples = 0
    duplicate_elements = 0
    wrong_sizes = 0
    
    for _ in range(num_samples):
        sample = algorithm(list(range(n)), k)
        
        # Проверка размера
        if len(sample) != k:
            wrong_sizes += 1
            
        # Проверка уникальности
        if len(set(sample)) != len(sample):
            duplicate_elements += 1
            
        # Проверка валидности элементов
        if not all(0 <= elem < n for elem in sample):
            invalid_samples += 1
    
    return [
        ("samples_tested", num_samples),
        ("wrong_sizes", wrong_sizes),
        ("duplicate_elements", duplicate_elements),
        ("invalid_elements", invalid_samples),
        ("success_rate", f"{(num_samples - wrong_sizes - duplicate_elements - invalid_samples) / num_samples:.4f}")
    ]

def test_sampling_coverage(algorithm, n, k, num_samples=10000):
    """
    Тест покрытия элементов в выборках
    """
    coverage_counts = [0] * n
    pair_counts = defaultdict(int)
    
    for _ in range(num_samples):
        sample = algorithm(list(range(n)), k)
        
        # Подсчет покрытия элементов
        for elem in sample:
            coverage_counts[elem] += 1
            
        # Подсчет пар (если k >= 2)
        if k >= 2:
            sorted_sample = sorted(sample)
            for i in range(len(sorted_sample)):
                for j in range(i+1, len(sorted_sample)):
                    pair_counts[(sorted_sample[i], sorted_sample[j])] += 1
    
    # Анализ покрытия
    min_coverage = min(coverage_counts)
    max_coverage = max(coverage_counts)
    coverage_ratio = min_coverage / max_coverage if max_coverage > 0 else 0
    
    results = [
        ("samples_tested", num_samples),
        ("min_element_coverage", min_coverage),
        ("max_element_coverage", max_coverage),
        ("coverage_ratio", f"{coverage_ratio:.4f}"),
        ("elements_never_selected", sum(1 for count in coverage_counts if count == 0))
    ]
    
    if k >= 2:
        expected_pairs = comb(n, 2)
        actual_pairs = len(pair_counts)
        results.extend([
            ("unique_pairs_found", actual_pairs),
            ("total_possible_pairs", expected_pairs),
            ("pair_coverage", f"{actual_pairs / expected_pairs:.4f}")
        ])
    
    return results

def test_sampling_performance(algorithm, n, k, num_samples=1000):
    """
    Тест производительности алгоритма выборки
    """
    times = []
    
    for _ in range(num_samples):
        start_time = time.time()
        sample = algorithm(list(range(n)), k)
        end_time = time.time()
        times.append(end_time - start_time)
    
    return [
        ("samples_tested", num_samples),
        ("avg_time_seconds", f"{mean(times):.6f}"),
        ("min_time_seconds", f"{min(times):.6f}"),
        ("max_time_seconds", f"{max(times):.6f}"),
        ("std_time_seconds", f"{stdev(times):.6f}")
    ]


# Перестановки из n

In [7]:
def test_permutation_uniformity(algorithm, n, num_samples=10000):
    """
    Тест равномерности распределения перестановок
    """
    if n > 8:  # Для больших n используем позиционную равномерность
        return test_permutation_positional_uniformity(algorithm, n, num_samples)
    
    # Для малых n проверяем все возможные перестановки
    permutation_counts = defaultdict(int)
    total_permutations = factorial(n)
    
    for _ in range(num_samples):
        arr = list(range(n))
        algorithm(arr)
        permutation_counts[tuple(arr)] += 1
    
    # Хи-квадрат тест
    expected_freq = num_samples / total_permutations
    observed_freqs = list(permutation_counts.values())
    
    # Добавляем нулевые частоты для невстретившихся перестановок
    observed_freqs.extend([0] * (total_permutations - len(observed_freqs)))
    
    chi2_stat = sum((obs - expected_freq)**2 / expected_freq for obs in observed_freqs)
    degrees_of_freedom = total_permutations - 1
    
    return [
        ("generated_permutations", len(permutation_counts)),
        ("total_possible", total_permutations),
        ("chi2_statistic", f"{chi2_stat:.4f}"),
        ("degrees_of_freedom", degrees_of_freedom),
        ("uniformity_passed", chi2_stat < 3.841 * degrees_of_freedom / 100)  # Приблизительный критерий
    ]

def test_permutation_positional_uniformity(algorithm, n, num_samples=10000):
    """
    Тест равномерности элементов по позициям
    """
    position_counts = [[0] * n for _ in range(n)]
    
    for _ in range(num_samples):
        arr = list(range(n))
        algorithm(arr)
        for pos, elem in enumerate(arr):
            position_counts[pos][elem] += 1
    
    # Хи-квадрат тест для каждой позиции
    expected_freq = num_samples / n
    chi2_stats = []
    
    for pos in range(n):
        observed = position_counts[pos]
        chi2_stat = sum((obs - expected_freq)**2 / expected_freq for obs in observed)
        chi2_stats.append(chi2_stat)
    
    avg_chi2 = mean(chi2_stats)
    max_chi2 = max(chi2_stats)
    
    return [
        ("samples_tested", num_samples),
        ("avg_chi2_statistic", f"{avg_chi2:.4f}"),
        ("max_chi2_statistic", f"{max_chi2:.4f}"),
        ("positions_passed", sum(1 for chi2 in chi2_stats if chi2 < 3.841 * (n-1))),
        ("total_positions", n)
    ]

def test_permutation_properties(algorithm, n, num_samples=10000):
    """
    Тест статистических свойств перестановок
    """
    fixed_points = []
    inversions = []
    even_count = 0
    
    for _ in range(num_samples):
        arr = list(range(n))
        algorithm(arr)
        
        # Подсчет неподвижных точек
        fixed_points.append(sum(1 for i, x in enumerate(arr) if i == x))
        
        # Подсчет инверсий
        inv_count = sum(1 for i in range(n) for j in range(i+1, n) if arr[i] > arr[j])
        inversions.append(inv_count)
        
        # Подсчет четности
        if inv_count % 2 == 0:
            even_count += 1
    
    # Проверка распределения неподвижных точек (должно быть близко к Пуассону с λ=1)
    avg_fixed = mean(fixed_points)
    
    # Проверка равномерности четных/нечетных перестановок
    even_ratio = even_count / num_samples
    
    return [
        ("samples_tested", num_samples),
        ("avg_fixed_points", f"{avg_fixed:.4f}"),
        ("expected_fixed_points", "1.0"),
        ("avg_inversions", f"{mean(inversions):.2f}"),
        ("expected_inversions", f"{n*(n-1)/4:.2f}"),
        ("even_permutations_ratio", f"{even_ratio:.4f}"),
        ("expected_even_ratio", "0.5")
    ]

def test_permutation_performance(algorithm, n, num_samples=1000):
    """
    Тест производительности алгоритма перестановок
    """
    times = []
    
    for _ in range(num_samples):
        arr = list(range(n))
        start_time = time.time()
        algorithm(arr)
        end_time = time.time()
        times.append(end_time - start_time)
    
    return [
        ("samples_tested", num_samples),
        ("avg_time_seconds", f"{mean(times):.6f}"),
        ("min_time_seconds", f"{min(times):.6f}"),
        ("max_time_seconds", f"{max(times):.6f}"),
        ("std_time_seconds", f"{stdev(times):.6f}")
    ]


# Main

In [8]:
random.seed(42)
    
print("=== ТЕСТЫ АЛГОРИТМОВ ПЕРЕСТАНОВОК ===")

# Настройка для тестов перестановок
permutation_tests = {
    "uniformity": test_permutation_uniformity,
    "properties": test_permutation_properties,
    "performance": test_permutation_performance,
}

permutation_algs = {
    "Fisher-Yates": fisher_yates_shuffle,
    # "Generate-Permutation": generate_random_permutation,
}

permutation_test_cases = {
    "uniformity": {
        'Fisher-Yates' : {
            "small_case": {"n": 4},
            "medium_case": {"n": 10},
        },
    },
    "properties": {
        'Fisher-Yates' : {
            "small_case": {"n": 5},
            "large_case": {"n": 100},
        },
    },
    "performance": {
        'Fisher-Yates' : {
            "small_case": {"n": 100},
            "medium_case": {"n": 1000},
            "large_case": {"n": 10000},
        },
    },
}

for test_name in permutation_tests.keys():
    print(f"\n{'='*60}")
    print(f"ТЕСТ ПЕРЕСТАНОВОК: {test_name.upper()}")
    print(f"{'='*60}")
    
    for alg_name in permutation_algs.keys():
        print(f"\n{'-'*40}")
        print(f"АЛГОРИТМ: {alg_name}")
        print(f"{'-'*40}")
        
        for case_name, params in permutation_test_cases[test_name][alg_name].items():
            print(f"\n{case_name}: {list(params.values())}")
            
            try:
                results = permutation_tests[test_name](permutation_algs[alg_name], **params)
                for metric_name, value in results:
                    print(f"  {metric_name}: {value}")
            except Exception as e:
                print(f"  ОШИБКА: {e}")

print("\n\n=== ТЕСТЫ АЛГОРИТМОВ ВЫБОРКИ ===")

# Настройка для тестов выборки
sampling_tests = {
    "uniformity": test_sampling_uniformity,
    "properties": test_sampling_properties,
    "coverage": test_sampling_coverage,
    "performance": test_sampling_performance,
}

sampling_algs = {
    "Reservoir-Sampling": reservoir_sampling_list,
}

sampling_test_cases = {
    "uniformity": {
        "Reservoir-Sampling" : {
            "small_case": {"n": 20, "k": 5},
            "medium_case": {"n": 100, "k": 10},
        },
    },
    "properties": {
        "Reservoir-Sampling" : {
            "basic_case": {"n": 50, "k": 10},
            "edge_case": {"n": 10, "k": 9},
        },
    },
    "coverage": {
        "Reservoir-Sampling" : {
            "low_coverage": {"n": 100, "k": 5},
            "high_coverage": {"n": 20, "k": 15},
        },
    },
    "performance": {
        "Reservoir-Sampling" : {
            "small_case": {"n": 1000, "k": 100},
            "medium_case": {"n": 10000, "k": 1000},
            "large_case": {"n": 100000, "k": 10000},
        },
    },
}

for test_name in sampling_tests.keys():
    print(f"\n{'='*60}")
    print(f"ТЕСТ ВЫБОРКИ: {test_name.upper()}")
    print(f"{'='*60}")
    
    for alg_name in sampling_algs.keys():
        print(f"\n{'-'*40}")
        print(f"АЛГОРИТМ: {alg_name}")
        print(f"{'-'*40}")
        
        for case_name, params in sampling_test_cases[test_name][alg_name].items():
            print(f"\n{case_name}: n={params['n']}, k={params['k']}")
            
            try:
                results = sampling_tests[test_name](sampling_algs[alg_name], **params)
                for metric_name, value in results:
                    print(f"  {metric_name}: {value}")
            except Exception as e:
                print(f"  ОШИБКА: {e}")

print("\n\n=== ТЕСТЫ АЛГОРИТМОВ ДВОИЧНЫХ ВЕКТОРОВ ===")

# Настройка для тестов двоичных векторов
binary_vector_tests = {
    "uniformity": test_binary_vector_uniformity,
    "properties": test_binary_vector_properties,
    "distance": test_binary_vector_distance,
    "performance": test_binary_vector_performance,
}

binary_vector_algs = {
    "Random-Sampling": generate_binary_vector,
}

binary_vector_test_cases = {
    "uniformity": {
        "Random-Sampling" : {
            "small_case": {"n": 10, "t": 3},
            "medium_case": {"n": 50, "t": 10},
        },
    },
    "properties": {
        "Random-Sampling" : {
            "basic_case": {"n": 20, "t": 5},
            "edge_case": {"n": 10, "t": 9},
        },
    },
    "distance": {
        "Random-Sampling" : {
            "small_case": {"n": 20, "t": 5},
            "medium_case": {"n": 50, "t": 15},
        },
    },
    "performance": {
        "Random-Sampling" : {
            "small_case": {"n": 100, "t": 25},
            "medium_case": {"n": 1000, "t": 250},
            "large_case": {"n": 10000, "t": 2500},
        },
    },
}

for test_name in binary_vector_tests.keys():
    print(f"\n{'='*60}")
    print(f"ТЕСТ ДВОИЧНЫХ ВЕКТОРОВ: {test_name.upper()}")
    print(f"{'='*60}")
    
    for alg_name in binary_vector_algs.keys():
        print(f"\n{'-'*40}")
        print(f"АЛГОРИТМ: {alg_name}")
        print(f"{'-'*40}")
        
        for case_name, params in binary_vector_test_cases[test_name][alg_name].items():
            print(f"\n{case_name}: n={params['n']}, t={params['t']}")
            
            try:
                results = binary_vector_tests[test_name](binary_vector_algs[alg_name], **params)
                for metric_name, value in results:
                    print(f"  {metric_name}: {value}")
            except Exception as e:
                print(f"  ОШИБКА: {e}")

print("Все тесты пройдены успешно!")

=== ТЕСТЫ АЛГОРИТМОВ ПЕРЕСТАНОВОК ===


NameError: name 'fisher_yates_shuffle' is not defined