In [131]:
import numpy as np
import pandas as pd
from collections import defaultdict, Counter
import random
from itertools import combinations

def load_winning_numbers(file_path):
    df = pd.read_excel(file_path, header=None)
    return df.values.tolist()

def analyze_number_frequency(numbers):
    all_numbers = [num for combo in numbers for num in combo]
    frequency = Counter(all_numbers)
    total_draws = len(numbers)
    return {num: count / total_draws for num, count in frequency.items()}

def analyze_historical_similarities(numbers):
    similarities = defaultdict(list)
    for i in range(1, len(numbers)):
        current = set(numbers[i])
        for j in range(i):
            prev = set(numbers[j])
            similarity = len(current & prev)
            similarities[similarity].append((j, i))
    return similarities

def calculate_combination_probability(combo, historical_numbers, historical_similarities):
    combo_set = set(combo)
    probability = 0
    total_comparisons = sum(len(sim_list) for sim_list in historical_similarities.values())
    
    for similarity, occurrences in historical_similarities.items():
        for prev_idx, current_idx in occurrences:
            prev_set = set(historical_numbers[prev_idx])
            current_set = set(historical_numbers[current_idx])
            if len(combo_set & prev_set) == similarity:
                probability += 1 / total_comparisons
    
    return probability

def predict_next_numbers_with_historical_similarity(historical_numbers, historical_similarities, num_predictions=5):
    all_possible_combos = list(combinations(range(1, 46), 6))
    combo_probabilities = []
    
    for combo in all_possible_combos:
        prob = calculate_combination_probability(combo, historical_numbers, historical_similarities)
        combo_probabilities.append((combo, prob))
    
    combo_probabilities.sort(key=lambda x: x[1], reverse=True)
    return combo_probabilities[:num_predictions]

def evaluate_prediction_accuracy(predictions, actual_numbers):
    actual_set = set(actual_numbers)
    accuracies = []
    for pred, _ in predictions:
        pred_set = set(pred)
        accuracy = len(pred_set & actual_set) / 6  # 6개 번호 중 맞춘 비율
        accuracies.append(accuracy)
    return accuracies

# 메인 실행 부분
file_path = 'winner_number.xlsx'
winning_numbers = load_winning_numbers(file_path)

print(f"총 {len(winning_numbers)}개의 당첨 번호를 로드했습니다.")

last_winning_numbers = winning_numbers[-1]
print("\n마지막 당첨 번호:", last_winning_numbers)

historical_similarities = analyze_historical_similarities(winning_numbers)

print("\n역사적 번호 유사성 분포:")
for similarity, occurrences in sorted(historical_similarities.items()):
    percentage = (len(occurrences) / sum(len(sim_list) for sim_list in historical_similarities.values())) * 100
    print(f"{similarity}개 일치: {len(occurrences)}회 ({percentage:.2f}%)")

predictions = predict_next_numbers_with_historical_similarity(winning_numbers, historical_similarities)

print("\n역사적 유사성 기반 다음 회차 예측 (상위 5개):")
for i, (pred, prob) in enumerate(predictions, 1):
    relative_prob = prob / (1/8145060)
    print(f"{i}. 번호: {sorted(pred)}")
    print(f"   상대적 확률: 무작위 선택 대비 {relative_prob:.2f}배")
    print(f"   절대 확률: {prob:.10f} ({prob*100:.8f}%)")

number_frequency = analyze_number_frequency(winning_numbers)
top_numbers = sorted(number_frequency.items(), key=lambda x: x[1], reverse=True)[:10]

print("\n가장 자주 나온 번호 (상위 10개):")
for num, freq in top_numbers:
    print(f"번호 {num}: {freq:.2%}")

# 예측된 번호와 자주 나온 번호의 일치도 분석
print("\n예측된 번호와 자주 나온 번호의 일치도:")
top_numbers_set = set(num for num, _ in top_numbers)
for i, (pred, _) in enumerate(predictions, 1):
    common_numbers = set(pred) & top_numbers_set
    print(f"예측 {i}: 상위 10개 번호와 {len(common_numbers)}개 일치 - {sorted(common_numbers)}")

# 이전 회차의 예측 정확도 평가 (마지막 회차 제외)
print("\n이전 10회차 예측 정확도 평가:")
for i in range(-11, -1):  # 마지막 11회차부터 2회차까지
    historical_data = winning_numbers[:i]
    historical_sims = analyze_historical_similarities(historical_data)
    predictions = predict_next_numbers_with_historical_similarity(historical_data, historical_sims)
    actual_numbers = winning_numbers[i]
    accuracies = evaluate_prediction_accuracy(predictions, actual_numbers)
    avg_accuracy = sum(accuracies) / len(accuracies)
    print(f"회차 {len(winning_numbers)+i+1}: 평균 정확도 {avg_accuracy:.2%}")

print("\n주의: 이 분석과 예측은 과거 데이터를 기반으로 한 것이며, 미래의 결과를 보장하지 않습니다.")

총 1139개의 당첨 번호를 로드했습니다.

마지막 당첨 번호: [5, 12, 15, 30, 37, 40]

역사적 번호 유사성 분포:
0개 일치: 260016회 (40.12%)
1개 일치: 274430회 (42.34%)
2개 일치: 98336회 (15.17%)
3개 일치: 14466회 (2.23%)
4개 일치: 825회 (0.13%)
5개 일치: 18회 (0.00%)


KeyboardInterrupt: 

In [130]:
import numpy as np
import pandas as pd
from collections import defaultdict, Counter
import random
from itertools import combinations
from multiprocessing import Pool, cpu_count
import time

def load_winning_numbers(file_path):
    df = pd.read_excel(file_path, header=None)
    return df.values.tolist()

def analyze_number_frequency(numbers):
    all_numbers = [num for combo in numbers for num in combo]
    frequency = Counter(all_numbers)
    total_draws = len(numbers)
    return {num: count / total_draws for num, count in frequency.items()}

def analyze_historical_similarities(numbers):
    similarities = defaultdict(int)
    numbers_set = [set(num) for num in numbers]
    for i in range(1, len(numbers)):
        for j in range(i):
            similarity = len(numbers_set[i] & numbers_set[j])
            similarities[similarity] += 1
    return similarities

def calculate_combination_probability(combo, historical_numbers_set, historical_similarities, total_comparisons):
    combo_set = set(combo)
    probability = sum(
        historical_similarities[len(combo_set & prev_set)]
        for prev_set in historical_numbers_set
    ) / total_comparisons
    return combo, probability

def predict_next_numbers_with_historical_similarity(historical_numbers, historical_similarities, num_predictions=5):
    all_possible_combos = list(combinations(range(1, 46), 6))
    historical_numbers_set = [set(num) for num in historical_numbers]
    total_comparisons = sum(historical_similarities.values())

    with Pool(cpu_count()) as p:
        combo_probabilities = p.starmap(
            calculate_combination_probability,
            [(combo, historical_numbers_set, historical_similarities, total_comparisons) for combo in all_possible_combos]
        )

    combo_probabilities.sort(key=lambda x: x[1], reverse=True)
    return combo_probabilities[:num_predictions]

def evaluate_prediction_accuracy(predictions, actual_numbers):
    actual_set = set(actual_numbers)
    return [len(set(pred) & actual_set) / 6 for pred, _ in predictions]

def main():
    start_time = time.time()

    file_path = 'winner_number.xlsx'
    winning_numbers = load_winning_numbers(file_path)

    print(f"총 {len(winning_numbers)}개의 당첨 번호를 로드했습니다.")

    last_winning_numbers = winning_numbers[-1]
    print("\n마지막 당첨 번호:", last_winning_numbers)

    historical_similarities = analyze_historical_similarities(winning_numbers)

    print("\n역사적 번호 유사성 분포:")
    total_comparisons = sum(historical_similarities.values())
    for similarity, count in sorted(historical_similarities.items()):
        percentage = (count / total_comparisons) * 100
        print(f"{similarity}개 일치: {count}회 ({percentage:.2f}%)")

    predictions = predict_next_numbers_with_historical_similarity(winning_numbers, historical_similarities)

    print("\n역사적 유사성 기반 다음 회차 예측 (상위 5개):")
    for i, (pred, prob) in enumerate(predictions, 1):
        relative_prob = prob / (1/8145060)
        print(f"{i}. 번호: {sorted(pred)}")
        print(f"   상대적 확률: 무작위 선택 대비 {relative_prob:.2f}배")
        print(f"   절대 확률: {prob:.10f} ({prob*100:.8f}%)")

    number_frequency = analyze_number_frequency(winning_numbers)
    top_numbers = sorted(number_frequency.items(), key=lambda x: x[1], reverse=True)[:10]

    print("\n가장 자주 나온 번호 (상위 10개):")
    for num, freq in top_numbers:
        print(f"번호 {num}: {freq:.2%}")

    print("\n예측된 번호와 자주 나온 번호의 일치도:")
    top_numbers_set = set(num for num, _ in top_numbers)
    for i, (pred, _) in enumerate(predictions, 1):
        common_numbers = set(pred) & top_numbers_set
        print(f"예측 {i}: 상위 10개 번호와 {len(common_numbers)}개 일치 - {sorted(common_numbers)}")

    print("\n이전 10회차 예측 정확도 평가:")
    for i in range(-11, -1):
        historical_data = winning_numbers[:i]
        historical_sims = analyze_historical_similarities(historical_data)
        predictions = predict_next_numbers_with_historical_similarity(historical_data, historical_sims)
        actual_numbers = winning_numbers[i]
        accuracies = evaluate_prediction_accuracy(predictions, actual_numbers)
        avg_accuracy = sum(accuracies) / len(accuracies)
        print(f"회차 {len(winning_numbers)+i+1}: 평균 정확도 {avg_accuracy:.2%}")

    end_time = time.time()
    print(f"\n총 실행 시간: {end_time - start_time:.2f}초")

    print("\n주의: 이 분석과 예측은 과거 데이터를 기반으로 한 것이며, 미래의 결과를 보장하지 않습니다.")


main()

총 1139개의 당첨 번호를 로드했습니다.

마지막 당첨 번호: [5, 12, 15, 30, 37, 40]

역사적 번호 유사성 분포:
0개 일치: 260016회 (40.12%)
1개 일치: 274430회 (42.34%)
2개 일치: 98336회 (15.17%)
3개 일치: 14466회 (2.23%)
4개 일치: 825회 (0.13%)
5개 일치: 18회 (0.00%)


Process SpawnPoolWorker-43:
Traceback (most recent call last):
  File "/Users/terman/.pyenv/versions/miniforge3-4.10.3-10/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/terman/.pyenv/versions/miniforge3-4.10.3-10/lib/python3.9/multiprocessing/process.py", line 108, in run
    self._target(*self._args, **self._kwargs)
  File "/Users/terman/.pyenv/versions/miniforge3-4.10.3-10/lib/python3.9/multiprocessing/pool.py", line 114, in worker
    task = get()
  File "/Users/terman/.pyenv/versions/miniforge3-4.10.3-10/lib/python3.9/multiprocessing/queues.py", line 368, in get
    return _ForkingPickler.loads(res)
AttributeError: Can't get attribute 'calculate_combination_probability' on <module '__main__' (built-in)>
Process SpawnPoolWorker-42:
Traceback (most recent call last):
  File "/Users/terman/.pyenv/versions/miniforge3-4.10.3-10/lib/python3.9/multiprocessing/process.py", line 315, in _bootstrap
    self.run()
  File "/Users/terman/.pyenv/

KeyboardInterrupt: 