#### 로또 번호 예측

In [1]:
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
import numpy as np
from skopt import gp_minimize
from skopt.space import Integer

# 번호의 중복 제거 및 부족 시 채워주는 함수
def ensure_unique_numbers(numbers):
    numbers = list(set(numbers))  # 중복 제거
    while len(numbers) < 6:
        numbers.append(random.randint(1, 45))  # 부족한 번호를 무작위로 추가
    return sorted(numbers[:6])

# 실제 번호와 예측 번호를 비교하여 보상을 계산하는 함수
def calculate_rewards(actual, predicted):
    match_count = len(set(actual) & set(predicted))  # 일치하는 번호 개수 계산
    rewards = {6: 10_000_000_000, 5: 1_499_000, 4: 49_000, 3: 4_000}
    return rewards.get(match_count, -1_000)  # 일치하지 않으면 -1,000 반환

# 앙상블 예측 함수
def predict_ensemble_from_numpy(predictions_list):
    """
    각 모델의 예측값을 numpy array로 받아 앙상블 예측 수행.
    """
    flat_predictions = np.concatenate(predictions_list).flatten()
    flat_predictions = flat_predictions[(flat_predictions >= 1) & (flat_predictions <= 45)]  # 유효 번호 필터링
    frequency_counts = pd.Series(flat_predictions).value_counts()

    if frequency_counts.empty:
        print("빈도 계산 결과가 없습니다. 빈 결과를 반환합니다.")
        return []

    return ensure_unique_numbers(frequency_counts.nlargest(6).index.tolist())

# 예측 모델의 성능을 평가하는 함수
def evaluate_predictions(data, prediction_functions, weeks_to_evaluate=52):
    total_rewards = {method: {"total_reward": 0, "win_count": 0} for method in prediction_functions.keys()}
    total_rewards["Ensemble"] = {"total_reward": 0, "win_count": 0}  # 앙상블 결과 추가
    prediction_storage = {method: [] for method in prediction_functions.keys()}

    for weeks_ago in range(weeks_to_evaluate, 0, -1):
        train_data = data[:-weeks_ago]
        test_data = data.iloc[-weeks_ago].values[2:]

        print(f"\n### ({weeks_ago}주전)예측되는 번호와 당첨 금액 ###")
        print(f"=> 실제 당첨 번호           : {sorted(test_data)}")

        for method, func in prediction_functions.items():
            predicted = func(train_data, weeks_ago)
            predicted = ensure_unique_numbers(predicted)

            if not predicted:
                print(f"{method}의 예측 결과가 비어 있습니다.")
                continue

            prediction_storage[method].append(predicted)  # 숫자 리스트 저장
            reward = calculate_rewards(test_data, predicted)
            total_rewards[method]["total_reward"] += reward
            if reward > 0:
                total_rewards[method]["win_count"] += 1

            print(f"{method}: {sorted(predicted)} (당첨금: {reward}원)")

        # 앙상블 예측 수행
        ensemble_predictions = predict_ensemble_from_numpy(
            [np.array(prediction_storage[method]) for method in prediction_storage]
        )
        ensemble_reward = calculate_rewards(test_data, ensemble_predictions)
        total_rewards["Ensemble"]["total_reward"] += ensemble_reward
        if ensemble_reward > 0:
            total_rewards["Ensemble"]["win_count"] += 1

        print(f"Ensemble                    : {ensemble_predictions} (당첨금: {ensemble_reward}원)")

    return total_rewards

# 높은 빈도의 숫자를 기반으로 예측
def predict_high_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nlargest(6).index.tolist()  # 상위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 낮은 빈도의 숫자를 기반으로 예측
def predict_low_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nsmallest(6).index.tolist()  # 하위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 높은 빈도 3개 + 낮은 빈도 3개 조합으로 예측
def predict_combined_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    most_frequent = frequency_counts.nlargest(3).index.tolist()  # 상위 3개 숫자
    least_frequent = frequency_counts.nsmallest(3).index.tolist()  # 하위 3개 숫자
    return ensure_unique_numbers(most_frequent + least_frequent)

# PCA를 사용하여 다음 벡터를 예측
def predict_next_vector(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numbers)  # 데이터 스케일링

    pca = PCA(n_components=1)
    principal_component = pca.fit_transform(scaled_data)  # PCA 변환

    next_vector = principal_component[-1] + (principal_component[-1] - principal_component[-2])  # 다음 벡터 예측
    inverse_scaled_vector = scaler.inverse_transform(scaled_data.mean(axis=0).reshape(1, -1)) + next_vector

    predicted_numbers = [int(round(num)) for num in inverse_scaled_vector[0] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# 신경망을 사용하여 번호를 예측
def predict_neural_network(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(6, activation='sigmoid')  # Using sigmoid to restrict range
    ])

    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])

    # Add random noise (jitter) to each prediction
    jittered_predictions = predictions + np.random.uniform(-0.05, 0.05, size=predictions.shape)
    jittered_predictions = np.clip(jittered_predictions, 0, 1)  # Ensure valid range for sigmoid outputs

    # Inverse transform to original scale
    inverse_transformed = scaler_y.inverse_transform(jittered_predictions)

    # Convert to integers and ensure range
    rounded_numbers = [int(round(num)) for num in inverse_transformed.flatten() if 1 <= round(num) <= 45]
    return ensure_unique_numbers(rounded_numbers[:6])

# 랜덤 포레스트를 사용하여 번호를 예측
def predict_random_forest(data, weeks_ago):
    from sklearn.ensemble import RandomForestRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# Gradient Boosting을 사용하여 번호를 예측
def predict_gradient_boosting(data, weeks_ago):
    from sklearn.ensemble import GradientBoostingRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    y_train = y_train.iloc[:, 0].values
    y_test = y_test.iloc[:, 0].values

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# LSTM을 사용하여 번호를 예측
def predict_lstm(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(numbers)

    X, y = [], []
    for i in range(len(scaled_data) - 10):
        X.append(scaled_data[i:i + 10])
        y.append(scaled_data[i + 10])

    X, y = np.array(X), np.array(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(128, return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(32, activation='relu'),
        Dense(6, activation='linear')
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])
    predicted_numbers = scaler.inverse_transform(predictions).flatten()
    return ensure_unique_numbers([int(round(num)) for num in predicted_numbers if 1 <= round(num) <= 45])

# Bayesian Optimization을 사용하여 번호를 예측
def predict_bayesian(data, weeks_ago):
    numbers = data.iloc[:, 2:].values

    def objective_function(x):
        selected_numbers = x
        count = sum(num in selected_numbers for num in numbers.flatten())
        return -count

    search_space = [Integer(1, 45) for _ in range(6)]
    result = gp_minimize(objective_function, search_space, n_calls=100)
    return ensure_unique_numbers(result.x)

# K-평균 클러스터링을 사용하여 번호를 예측
def predict_kmeans_clustering(data, weeks_ago, n_clusters=5):
    numbers = data.iloc[:, 2:].values.flatten().reshape(-1, 1)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(numbers)

    # 가장 빈번한 클러스터 선택
    cluster_counts = pd.Series(kmeans.labels_).value_counts()
    top_cluster = cluster_counts.idxmax()
    top_numbers = numbers[kmeans.labels_ == top_cluster]

    # 상위 6개의 번호 선택
    predicted_numbers = pd.Series(top_numbers.flatten()).value_counts().nlargest(6).index.tolist()
    return ensure_unique_numbers(predicted_numbers)

# 오토인코더를 사용하여 번호를 예측
def predict_autoencoder(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(numbers)

    # 오토인코더 정의
    input_dim = scaled_data.shape[1]
    autoencoder = Sequential([
        Input(shape=(input_dim,)),
        Dense(16, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

    # 복원된 데이터로 패턴 분석
    decoded_data = autoencoder.predict(scaled_data)
    predicted_numbers = [int(round(num)) for num in decoded_data[-1] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# Markov Chain을 사용하여 번호를 예측
def predict_markov_chain(data, weeks_ago):
    numbers = data.iloc[:, 2:].values.flatten()
    transitions = pd.crosstab(numbers[:-1], numbers[1:])
    probabilities = transitions.div(transitions.sum(axis=1), axis=0)

    last_number = numbers[-1]
    if last_number in probabilities.index:
        next_numbers = probabilities.loc[last_number].nlargest(6).index.tolist()
    else:
        next_numbers = random.sample(range(1, 46), 6)  # 기본값으로 랜덤 샘플 제공

    return ensure_unique_numbers(next_numbers)

# 메인 함수
def main():
    try:
        file_path = "Loto_number.xlsx"  # 엑셀 파일 경로
        data = pd.read_excel(file_path)
    except Exception as e:
        print(f"데이터 로드 실패: {e}")
        return

    # 예측 함수 매핑
    prediction_functions = {
        "total_high_frequency        ": predict_high_frequency_numbers,
        "total_low_frequency         ": predict_low_frequency_numbers,
        "total_combined_frequency    ": predict_combined_frequency_numbers,
        "total_next_vector           ": predict_next_vector,

        "recent_52_high_frequency    ": predict_high_frequency_numbers,
        "recent_52_low_frequency     ": predict_low_frequency_numbers,
        "recent_52_combined_frequency": predict_combined_frequency_numbers,
        "recent_52_next_vector       ": predict_next_vector,

        "recent_26_high_frequency    ": predict_high_frequency_numbers,
        "recent_26_low_frequency     ": predict_low_frequency_numbers,
        "recent_26_combined_frequency": predict_combined_frequency_numbers,
        "recent_26_next_vector       ": predict_next_vector,

        "neural_network              ": predict_neural_network,
        "random_forest               ": predict_random_forest,
        "gradient_boosting           ": predict_gradient_boosting,
        "LSTM                        ": predict_lstm,
        "Bayesian                    ": predict_bayesian,
        "KMeans                      ": predict_kmeans_clustering,
        "AutoEncoder                 ": predict_autoencoder,
        "MarkovChain                 ": predict_markov_chain
    }

    # 모델 성능 평가
    total_rewards = evaluate_predictions(data, prediction_functions)

    print("\n### 지난 일년(52주) 동안 각각의 방법으로 예측된 총당첨금 ###")
    for method, stats in total_rewards.items():
        total_reward = stats["total_reward"]
        win_count = stats["win_count"]
        print(f"{method}: 총 당첨 {total_reward}원({win_count}회)")
           
    print("\n### 다음 주 예상 번호 ###")
    all_predictions = []  # 모든 예측 결과를 저장할 리스트
    for method, func in prediction_functions.items():
        try:
            predicted = func(data, weeks_ago=1)
            predicted = ensure_unique_numbers(predicted)
            all_predictions.append(predicted)  # 앙상블에 포함할 결과 저장
            print(f"{method}: {predicted}")
        except Exception as e:
            print(f"{method}: 예측 실패 (오류: {e})")

    # 앙상블 예측 수행
    if all_predictions:
        ensemble_predictions = predict_ensemble_from_numpy([np.array(pred) for pred in all_predictions])
        print(f"\nEnsemble                    : {ensemble_predictions}")

if __name__ == "__main__":
    main()



### (52주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 7, 13, 28, 36, 42]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
recent_52_high_frequency    : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
recent_52_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_52_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_52_next_vector       : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
recent_26_high_frequency    : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
recent_26_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_26_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_26_next_vector       : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
neural_network              : [5, 1

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 56ms/step
neural_network              : [7, 16, 19, 25, 30, 39] (당첨금: -1000원)
random_forest               : [6, 13, 21, 26, 35, 39] (당첨금: -1000원)
gradient_boosting           : [2, 6, 7, 8, 10, 23] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238ms/step
LSTM                        : [7, 13, 14, 33, 39, 40] (당첨금: -1000원)
Bayesian                    : [3, 7, 12, 18, 33, 39] (당첨금: -1000원)
KMeans                      : [17, 18, 19, 21, 24, 26] (당첨금: -1000원)
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 8, 15, 26, 33, 42] (당첨금: -1000원)
MarkovChain                 : [3, 39, 40, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (47주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [1, 3, 4, 29, 42, 45]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency         

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
neural_network              : [6, 13, 18, 25, 35, 40] (당첨금: -1000원)
random_forest               : [9, 16, 23, 28, 34, 39] (당첨금: -1000원)
gradient_boosting           : [4, 5, 6, 7, 8, 11] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 255ms/step
LSTM                        : [6, 12, 32, 33, 38, 39] (당첨금: -1000원)
Bayesian                    : [6, 12, 16, 18, 36, 45] (당첨금: -1000원)
KMeans                      : [3, 6, 7, 11, 12, 13] (당첨금: 4000원)
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
AutoEncoder                 : [1, 6, 8, 21, 21, 42] (당첨금: -1000원)
MarkovChain                 : [2, 41, 42, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (42주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [3, 13, 30, 33, 43, 45]
total_high_frequency        : [12, 18, 21, 33, 38, 45] (당첨금: -1000원)
total_low_frequency         : [5

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 60ms/step
neural_network              : [5, 13, 21, 25, 30, 42] (당첨금: -1000원)
random_forest               : [6, 13, 19, 27, 33, 41] (당첨금: -1000원)
gradient_boosting           : [3, 5, 6, 7, 9, 29] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 244ms/step
LSTM                        : [7, 14, 15, 33, 34, 40] (당첨금: -1000원)
Bayesian                    : [6, 7, 11, 12, 18, 21] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 14, 16, 18] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 20, 34, 42, 42, 45] (당첨금: -1000원)
MarkovChain                 : [3, 39, 40, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (37주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [15, 16, 17, 25, 30, 31]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency      

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
neural_network              : [7, 14, 21, 25, 30, 40] (당첨금: -1000원)
random_forest               : [8, 13, 20, 26, 33, 40] (당첨금: -1000원)
gradient_boosting           : [3, 5, 6, 7, 8, 10] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step
LSTM                        : [7, 14, 20, 33, 39, 40] (당첨금: -1000원)
Bayesian                    : [2, 12, 18, 33, 38, 39] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 14, 16, 18] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 10, 25, 37, 38, 44] (당첨금: -1000원)
MarkovChain                 : [1, 2, 3, 4, 5, 6] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (32주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 24, 31, 32, 38, 44]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [4, 11, 17, 27, 36, 39] (당첨금: -1000원)
random_forest               : [6, 13, 20, 26, 34, 41] (당첨금: -1000원)
gradient_boosting           : [2, 4, 6, 7, 8, 13] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step
LSTM                        : [7, 13, 19, 24, 38, 39] (당첨금: -1000원)
Bayesian                    : [6, 12, 18, 19, 38, 43] (당첨금: -1000원)
KMeans                      : [18, 19, 21, 24, 26, 27] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 17, 34, 36, 38, 44] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (27주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [4, 5, 9, 11, 37, 40]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency       

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 82ms/step
neural_network              : [6, 13, 16, 21, 32, 38] (당첨금: -1000원)
random_forest               : [7, 11, 21, 26, 31, 40] (당첨금: -1000원)
gradient_boosting           : [5, 8, 9, 10, 24, 28] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 228ms/step
LSTM                        : [6, 13, 14, 19, 33, 40] (당첨금: -1000원)
Bayesian                    : [12, 14, 18, 19, 38, 45] (당첨금: -1000원)
KMeans                      : [36, 38, 39, 43, 44, 45] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step
AutoEncoder                 : [1, 12, 16, 28, 37, 40] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (22주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [1, 2, 6, 14, 27, 38]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency     

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 71ms/step
neural_network              : [3, 9, 16, 29, 34, 39] (당첨금: -1000원)
random_forest               : [8, 13, 19, 29, 35, 41] (당첨금: -1000원)
gradient_boosting           : [3, 5, 6, 9, 10, 11] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 254ms/step
LSTM                        : [7, 13, 32, 33, 40, 41] (당첨금: -1000원)
Bayesian                    : [7, 13, 18, 34, 36, 44] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 16, 18, 19] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [2, 10, 12, 24, 33, 44] (당첨금: -1000원)
MarkovChain                 : [25, 27, 29, 30, 31, 32] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (17주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [21, 33, 35, 38, 42, 44]
total_high_frequency        : [6, 12, 18, 19, 21, 33] (당첨금: -1000원)
total_low_frequency     

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
neural_network              : [8, 16, 21, 29, 30, 40] (당첨금: -1000원)
random_forest               : [6, 14, 20, 26, 31, 39] (당첨금: -1000원)
gradient_boosting           : [5, 6, 7, 8, 9, 21] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step
LSTM                        : [6, 7, 32, 33, 39, 40] (당첨금: -1000원)
Bayesian                    : [11, 13, 19, 33, 39, 44] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 16, 18, 19] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 4, 17, 39, 42, 43] (당첨금: -1000원)
MarkovChain                 : [2, 41, 42, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (12주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [7, 11, 12, 21, 26, 35]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency       

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [9, 11, 13, 24, 30, 40] (당첨금: -1000원)
random_forest               : [8, 11, 20, 25, 30, 37] (당첨금: -1000원)
gradient_boosting           : [6, 7, 9, 13, 23, 28] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step
LSTM                        : [6, 13, 19, 32, 33, 39] (당첨금: -1000원)
Bayesian                    : [19, 21, 24, 33, 34, 45] (당첨금: -1000원)
KMeans                      : [28, 30, 31, 33, 34, 36] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 872us/step
AutoEncoder                 : [1, 10, 13, 40, 42, 45] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (7주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 11, 17, 19, 40, 43]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency  

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 79ms/step
neural_network              : [6, 12, 20, 22, 35, 38] (당첨금: -1000원)
random_forest               : [6, 15, 20, 26, 32, 40] (당첨금: -1000원)
gradient_boosting           : [5, 7, 10, 16, 27, 33] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step
LSTM                        : [6, 13, 18, 32, 33, 40] (당첨금: -1000원)
Bayesian                    : [6, 13, 21, 36, 38, 45] (당첨금: -1000원)
KMeans                      : [28, 30, 31, 33, 34, 36] (당첨금: -1000원)
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 6, 15, 15, 36, 42] (당첨금: -1000원)
MarkovChain                 : [1, 3, 37, 38, 39, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (2주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [2, 3, 9, 15, 27, 29]
total_high_frequency        : [6, 12, 18, 19, 21, 33] (당첨금: -1000원)
total_low_frequency         

In [2]:
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
import numpy as np
from skopt import gp_minimize
from skopt.space import Integer

# 번호의 중복 제거 및 부족 시 채워주는 함수
def ensure_unique_numbers(numbers):
    numbers = list(set(numbers))  # 중복 제거
    while len(numbers) < 6:
        numbers.append(random.randint(1, 45))  # 부족한 번호를 무작위로 추가
    return sorted(numbers[:6])

# 실제 번호와 예측 번호를 비교하여 보상을 계산하는 함수
def calculate_rewards(actual, predicted):
    match_count = len(set(actual) & set(predicted))  # 일치하는 번호 개수 계산
    rewards = {6: 10_000_000_000, 5: 1_499_000, 4: 49_000, 3: 4_000}
    return rewards.get(match_count, -1_000)  # 일치하지 않으면 -1,000 반환

# 앙상블 예측 함수
def predict_ensemble_from_numpy(predictions_list):
    """
    각 모델의 예측값을 numpy array로 받아 앙상블 예측 수행.
    """
    flat_predictions = np.concatenate(predictions_list).flatten()
    flat_predictions = flat_predictions[(flat_predictions >= 1) & (flat_predictions <= 45)]  # 유효 번호 필터링
    frequency_counts = pd.Series(flat_predictions).value_counts()

    if frequency_counts.empty:
        print("빈도 계산 결과가 없습니다. 빈 결과를 반환합니다.")
        return []

    return ensure_unique_numbers(frequency_counts.nlargest(6).index.tolist())

# 예측 모델의 성능을 평가하는 함수
def evaluate_predictions(data, prediction_functions, weeks_to_evaluate=52):
    total_rewards = {method: {"total_reward": 0, "win_count": 0} for method in prediction_functions.keys()}
    total_rewards["Ensemble"] = {"total_reward": 0, "win_count": 0}  # 앙상블 결과 추가
    prediction_storage = {method: [] for method in prediction_functions.keys()}

    for weeks_ago in range(weeks_to_evaluate, 0, -1):
        train_data = data[:-weeks_ago]
        test_data = data.iloc[-weeks_ago].values[2:]

        print(f"\n### ({weeks_ago}주전)예측되는 번호와 당첨 금액 ###")
        print(f"=> 실제 당첨 번호           : {sorted(test_data)}")

        for method, func in prediction_functions.items():
            predicted = func(train_data, weeks_ago)
            predicted = ensure_unique_numbers(predicted)

            if not predicted:
                print(f"{method}의 예측 결과가 비어 있습니다.")
                continue

            prediction_storage[method].append(predicted)  # 숫자 리스트 저장
            reward = calculate_rewards(test_data, predicted)
            total_rewards[method]["total_reward"] += reward
            if reward > 0:
                total_rewards[method]["win_count"] += 1

            print(f"{method}: {sorted(predicted)} (당첨금: {reward}원)")

        # 앙상블 예측 수행
        ensemble_predictions = predict_ensemble_from_numpy(
            [np.array(prediction_storage[method]) for method in prediction_storage]
        )
        ensemble_reward = calculate_rewards(test_data, ensemble_predictions)
        total_rewards["Ensemble"]["total_reward"] += ensemble_reward
        if ensemble_reward > 0:
            total_rewards["Ensemble"]["win_count"] += 1

        print(f"Ensemble                    : {ensemble_predictions} (당첨금: {ensemble_reward}원)")

    return total_rewards

# 높은 빈도의 숫자를 기반으로 예측
def predict_high_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nlargest(6).index.tolist()  # 상위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 낮은 빈도의 숫자를 기반으로 예측
def predict_low_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nsmallest(6).index.tolist()  # 하위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 높은 빈도 3개 + 낮은 빈도 3개 조합으로 예측
def predict_combined_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    most_frequent = frequency_counts.nlargest(3).index.tolist()  # 상위 3개 숫자
    least_frequent = frequency_counts.nsmallest(3).index.tolist()  # 하위 3개 숫자
    return ensure_unique_numbers(most_frequent + least_frequent)

# PCA를 사용하여 다음 벡터를 예측
def predict_next_vector(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numbers)  # 데이터 스케일링

    pca = PCA(n_components=1)
    principal_component = pca.fit_transform(scaled_data)  # PCA 변환

    next_vector = principal_component[-1] + (principal_component[-1] - principal_component[-2])  # 다음 벡터 예측
    inverse_scaled_vector = scaler.inverse_transform(scaled_data.mean(axis=0).reshape(1, -1)) + next_vector

    predicted_numbers = [int(round(num)) for num in inverse_scaled_vector[0] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# 신경망을 사용하여 번호를 예측
def predict_neural_network(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(6, activation='sigmoid')  # Using sigmoid to restrict range
    ])

    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])

    # Add random noise (jitter) to each prediction
    jittered_predictions = predictions + np.random.uniform(-0.05, 0.05, size=predictions.shape)
    jittered_predictions = np.clip(jittered_predictions, 0, 1)  # Ensure valid range for sigmoid outputs

    # Inverse transform to original scale
    inverse_transformed = scaler_y.inverse_transform(jittered_predictions)

    # Convert to integers and ensure range
    rounded_numbers = [int(round(num)) for num in inverse_transformed.flatten() if 1 <= round(num) <= 45]
    return ensure_unique_numbers(rounded_numbers[:6])

# 랜덤 포레스트를 사용하여 번호를 예측
def predict_random_forest(data, weeks_ago):
    from sklearn.ensemble import RandomForestRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# Gradient Boosting을 사용하여 번호를 예측
def predict_gradient_boosting(data, weeks_ago):
    from sklearn.ensemble import GradientBoostingRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    y_train = y_train.iloc[:, 0].values
    y_test = y_test.iloc[:, 0].values

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# LSTM을 사용하여 번호를 예측
def predict_lstm(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(numbers)

    X, y = [], []
    for i in range(len(scaled_data) - 10):
        X.append(scaled_data[i:i + 10])
        y.append(scaled_data[i + 10])

    X, y = np.array(X), np.array(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(128, return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(32, activation='relu'),
        Dense(6, activation='linear')
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])
    predicted_numbers = scaler.inverse_transform(predictions).flatten()
    return ensure_unique_numbers([int(round(num)) for num in predicted_numbers if 1 <= round(num) <= 45])

# Bayesian Optimization을 사용하여 번호를 예측
def predict_bayesian(data, weeks_ago):
    numbers = data.iloc[:, 2:].values

    def objective_function(x):
        selected_numbers = x
        count = sum(num in selected_numbers for num in numbers.flatten())
        return -count

    search_space = [Integer(1, 45) for _ in range(6)]
    result = gp_minimize(objective_function, search_space, n_calls=100)
    return ensure_unique_numbers(result.x)

# K-평균 클러스터링을 사용하여 번호를 예측
def predict_kmeans_clustering(data, weeks_ago, n_clusters=5):
    numbers = data.iloc[:, 2:].values.flatten().reshape(-1, 1)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(numbers)

    # 가장 빈번한 클러스터 선택
    cluster_counts = pd.Series(kmeans.labels_).value_counts()
    top_cluster = cluster_counts.idxmax()
    top_numbers = numbers[kmeans.labels_ == top_cluster]

    # 상위 6개의 번호 선택
    predicted_numbers = pd.Series(top_numbers.flatten()).value_counts().nlargest(6).index.tolist()
    return ensure_unique_numbers(predicted_numbers)

# 오토인코더를 사용하여 번호를 예측
def predict_autoencoder(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(numbers)

    # 오토인코더 정의
    input_dim = scaled_data.shape[1]
    autoencoder = Sequential([
        Input(shape=(input_dim,)),
        Dense(16, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

    # 복원된 데이터로 패턴 분석
    decoded_data = autoencoder.predict(scaled_data)
    predicted_numbers = [int(round(num)) for num in decoded_data[-1] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# Markov Chain을 사용하여 번호를 예측
def predict_markov_chain(data, weeks_ago):
    numbers = data.iloc[:, 2:].values.flatten()
    transitions = pd.crosstab(numbers[:-1], numbers[1:])
    probabilities = transitions.div(transitions.sum(axis=1), axis=0)

    last_number = numbers[-1]
    if last_number in probabilities.index:
        next_numbers = probabilities.loc[last_number].nlargest(6).index.tolist()
    else:
        next_numbers = random.sample(range(1, 46), 6)  # 기본값으로 랜덤 샘플 제공

    return ensure_unique_numbers(next_numbers)

# 메인 함수
def main():
    try:
        file_path = "Loto_number.xlsx"  # 엑셀 파일 경로
        data = pd.read_excel(file_path)
    except Exception as e:
        print(f"데이터 로드 실패: {e}")
        return

    # 예측 함수 매핑
    prediction_functions = {
        "total_high_frequency        ": predict_high_frequency_numbers,
        "total_low_frequency         ": predict_low_frequency_numbers,
        "total_combined_frequency    ": predict_combined_frequency_numbers,
        "total_next_vector           ": predict_next_vector,

        "recent_52_high_frequency    ": predict_high_frequency_numbers,
        "recent_52_low_frequency     ": predict_low_frequency_numbers,
        "recent_52_combined_frequency": predict_combined_frequency_numbers,
        "recent_52_next_vector       ": predict_next_vector,

        "recent_26_high_frequency    ": predict_high_frequency_numbers,
        "recent_26_low_frequency     ": predict_low_frequency_numbers,
        "recent_26_combined_frequency": predict_combined_frequency_numbers,
        "recent_26_next_vector       ": predict_next_vector,

        "neural_network              ": predict_neural_network,
        "random_forest               ": predict_random_forest,
        "gradient_boosting           ": predict_gradient_boosting,
        "LSTM                        ": predict_lstm,
        "Bayesian                    ": predict_bayesian,
        "KMeans                      ": predict_kmeans_clustering,
        "AutoEncoder                 ": predict_autoencoder,
        "MarkovChain                 ": predict_markov_chain
    }

    # 모델 성능 평가
    total_rewards = evaluate_predictions(data, prediction_functions)

    print("\n### 지난 일년(52주) 동안 각각의 방법으로 예측된 총당첨금 ###")
    for method, stats in total_rewards.items():
        total_reward = stats["total_reward"]
        win_count = stats["win_count"]
        print(f"{method}: 총 당첨 {total_reward}원({win_count}회)")
           
    print("\n### 다음 주 예상 번호 ###")
    all_predictions = []  # 모든 예측 결과를 저장할 리스트
    for method, func in prediction_functions.items():
        try:
            predicted = func(data, weeks_ago=1)
            predicted = ensure_unique_numbers(predicted)
            all_predictions.append(predicted)  # 앙상블에 포함할 결과 저장
            print(f"{method}: {predicted}")
        except Exception as e:
            print(f"{method}: 예측 실패 (오류: {e})")

    # 앙상블 예측 수행
    if all_predictions:
        ensemble_predictions = predict_ensemble_from_numpy([np.array(pred) for pred in all_predictions])
        print(f"\nEnsemble                    : {ensemble_predictions}")

if __name__ == "__main__":
    main()



### (52주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 7, 13, 28, 36, 42]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
recent_52_high_frequency    : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
recent_52_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_52_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_52_next_vector       : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
recent_26_high_frequency    : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
recent_26_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_26_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_26_next_vector       : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 63ms/step
neural_network              : [7, 1

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 9, 11, 23, 28, 34] (당첨금: -1000원)
MarkovChain                 : [3, 39, 40, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (47주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [1, 3, 4, 29, 42, 45]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency         : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [11, 18, 24, 31, 37, 44] (당첨금: -1000원)
recent_52_high_frequency    : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
recent_52_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_52_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_52_next_vector       : [11, 18, 24, 31, 37, 44] (당첨금: -1000원)
recent_26_high_frequency    : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
recent_26_low_frequency     : [5

gradient_boosting           : [3, 5, 6, 7, 9, 11] (당첨금: 4000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step
LSTM                        : [7, 13, 33, 34, 39, 40] (당첨금: -1000원)
Bayesian                    : [18, 19, 38, 42, 43, 45] (당첨금: -1000원)
KMeans                      : [3, 6, 7, 11, 12, 13] (당첨금: 4000원)
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
AutoEncoder                 : [1, 11, 15, 25, 30, 35] (당첨금: -1000원)
MarkovChain                 : [2, 41, 42, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (42주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [3, 13, 30, 33, 43, 45]
total_high_frequency        : [12, 18, 21, 33, 38, 45] (당첨금: -1000원)
total_low_frequency         : [5, 8, 9, 20, 25, 32] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [3, 10, 16, 23, 29, 36] (당첨금: -1000원)
recent_52_high_frequency    : [12, 

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step
neural_network              : [4, 13, 19, 25, 30, 36] (당첨금: -1000원)
random_forest               : [6, 12, 21, 27, 34, 40] (당첨금: -1000원)
gradient_boosting           : [5, 6, 7, 15, 22, 38] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step
LSTM                        : [6, 7, 12, 32, 33, 39] (당첨금: 4000원)
Bayesian                    : [10, 13, 16, 18, 34, 45] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 14, 16, 18] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 3, 16, 30, 36, 41] (당첨금: -1000원)
MarkovChain                 : [3, 39, 40, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (37주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [15, 16, 17, 25, 30, 31]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency     

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [9, 13, 22, 23, 34, 39] (당첨금: -1000원)
random_forest               : [7, 15, 18, 26, 31, 38] (당첨금: 4000원)
gradient_boosting           : [3, 4, 5, 6, 7, 8] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 258ms/step
LSTM                        : [6, 13, 14, 21, 33, 40] (당첨금: -1000원)
Bayesian                    : [7, 19, 38, 39, 43, 45] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 14, 16, 18] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 10, 20, 22, 38, 43] (당첨금: -1000원)
MarkovChain                 : [1, 2, 3, 4, 5, 6] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (32주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 24, 31, 32, 38, 44]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : [5

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [5, 13, 15, 26, 28, 37] (당첨금: -1000원)
random_forest               : [7, 14, 19, 27, 34, 41] (당첨금: -1000원)
gradient_boosting           : [3, 5, 6, 7, 8, 9] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step
LSTM                        : [6, 7, 12, 13, 34, 39] (당첨금: -1000원)
Bayesian                    : [6, 13, 21, 34, 44, 45] (당첨금: -1000원)
KMeans                      : [18, 19, 21, 24, 26, 27] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 6, 11, 20, 32, 36] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (27주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [4, 5, 9, 11, 37, 40]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         :

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
neural_network              : [5, 10, 18, 29, 31, 39] (당첨금: -1000원)
random_forest               : [5, 11, 21, 27, 33, 40] (당첨금: -1000원)
gradient_boosting           : [5, 6, 7, 8, 9, 10] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 234ms/step
LSTM                        : [6, 13, 20, 26, 33, 40] (당첨금: -1000원)
Bayesian                    : [12, 15, 19, 30, 33, 36] (당첨금: -1000원)
KMeans                      : [36, 38, 39, 43, 44, 45] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 13, 15, 27, 33, 44] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (22주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [1, 2, 6, 14, 27, 38]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency      

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [7, 11, 17, 28, 35, 38] (당첨금: -1000원)
random_forest               : [6, 15, 22, 28, 34, 41] (당첨금: -1000원)
gradient_boosting           : [4, 5, 8, 10, 11, 12] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step
LSTM                        : [6, 12, 18, 32, 38, 39] (당첨금: -1000원)
Bayesian                    : [6, 14, 15, 18, 33, 45] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 16, 18, 19] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 3, 7, 8, 10, 15] (당첨금: -1000원)
MarkovChain                 : [25, 27, 29, 30, 31, 32] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (17주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [21, 33, 35, 38, 42, 44]
total_high_frequency        : [6, 12, 18, 19, 21, 33] (당첨금: -1000원)
total_low_frequency      

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [3, 11, 19, 27, 32, 37] (당첨금: -1000원)
random_forest               : [7, 14, 20, 25, 31, 39] (당첨금: -1000원)
gradient_boosting           : [5, 6, 8, 9, 11, 43] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step
LSTM                        : [6, 12, 13, 18, 32, 39] (당첨금: -1000원)
Bayesian                    : [3, 12, 16, 18, 33, 38] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 16, 18, 19] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 6, 13, 28, 43, 44] (당첨금: -1000원)
MarkovChain                 : [2, 41, 42, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (12주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [7, 11, 12, 21, 26, 35]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency      

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
neural_network              : [8, 14, 18, 29, 36, 41] (당첨금: -1000원)
random_forest               : [6, 14, 18, 24, 31, 38] (당첨금: -1000원)
gradient_boosting           : [6, 7, 8, 9, 29, 38] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 252ms/step
LSTM                        : [6, 12, 13, 19, 32, 39] (당첨금: -1000원)
Bayesian                    : [14, 18, 21, 27, 33, 38] (당첨금: -1000원)
KMeans                      : [28, 30, 31, 33, 34, 36] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 932us/step
AutoEncoder                 : [1, 4, 19, 28, 38, 42] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (7주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 11, 17, 19, 40, 43]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency    

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [6, 10, 19, 30, 35, 38] (당첨금: -1000원)
random_forest               : [8, 14, 21, 28, 32, 41] (당첨금: -1000원)
gradient_boosting           : [5, 6, 14, 25, 42, 45] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 227ms/step
LSTM                        : [6, 7, 32, 33, 39, 40] (당첨금: -1000원)
Bayesian                    : [3, 12, 19, 34, 35, 45] (당첨금: -1000원)
KMeans                      : [28, 30, 31, 33, 34, 36] (당첨금: -1000원)
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 5, 6, 9, 31, 41] (당첨금: -1000원)
MarkovChain                 : [1, 3, 37, 38, 39, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (2주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [2, 3, 9, 15, 27, 29]
total_high_frequency        : [6, 12, 18, 19, 21, 33] (당첨금: -1000원)
total_low_frequency         : [

In [3]:
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
import numpy as np
from skopt import gp_minimize
from skopt.space import Integer

# 번호의 중복 제거 및 부족 시 채워주는 함수
def ensure_unique_numbers(numbers):
    numbers = list(set(numbers))  # 중복 제거
    while len(numbers) < 6:
        numbers.append(random.randint(1, 45))  # 부족한 번호를 무작위로 추가
    return sorted(numbers[:6])

# 실제 번호와 예측 번호를 비교하여 보상을 계산하는 함수
def calculate_rewards(actual, predicted):
    match_count = len(set(actual) & set(predicted))  # 일치하는 번호 개수 계산
    rewards = {6: 10_000_000_000, 5: 1_499_000, 4: 49_000, 3: 4_000}
    return rewards.get(match_count, -1_000)  # 일치하지 않으면 -1,000 반환

# 앙상블 예측 함수
def predict_ensemble_from_numpy(predictions_list):
    """
    각 모델의 예측값을 numpy array로 받아 앙상블 예측 수행.
    """
    flat_predictions = np.concatenate(predictions_list).flatten()
    flat_predictions = flat_predictions[(flat_predictions >= 1) & (flat_predictions <= 45)]  # 유효 번호 필터링
    frequency_counts = pd.Series(flat_predictions).value_counts()

    if frequency_counts.empty:
        print("빈도 계산 결과가 없습니다. 빈 결과를 반환합니다.")
        return []

    return ensure_unique_numbers(frequency_counts.nlargest(6).index.tolist())

# 예측 모델의 성능을 평가하는 함수
def evaluate_predictions(data, prediction_functions, weeks_to_evaluate=52):
    total_rewards = {method: {"total_reward": 0, "win_count": 0} for method in prediction_functions.keys()}
    total_rewards["Ensemble"] = {"total_reward": 0, "win_count": 0}  # 앙상블 결과 추가
    prediction_storage = {method: [] for method in prediction_functions.keys()}

    for weeks_ago in range(weeks_to_evaluate, 0, -1):
        train_data = data[:-weeks_ago]
        test_data = data.iloc[-weeks_ago].values[2:]

        print(f"\n### ({weeks_ago}주전)예측되는 번호와 당첨 금액 ###")
        print(f"=> 실제 당첨 번호           : {sorted(test_data)}")

        for method, func in prediction_functions.items():
            predicted = func(train_data, weeks_ago)
            predicted = ensure_unique_numbers(predicted)

            if not predicted:
                print(f"{method}의 예측 결과가 비어 있습니다.")
                continue

            prediction_storage[method].append(predicted)  # 숫자 리스트 저장
            reward = calculate_rewards(test_data, predicted)
            total_rewards[method]["total_reward"] += reward
            if reward > 0:
                total_rewards[method]["win_count"] += 1

            print(f"{method}: {sorted(predicted)} (당첨금: {reward}원)")

        # 앙상블 예측 수행
        ensemble_predictions = predict_ensemble_from_numpy(
            [np.array(prediction_storage[method]) for method in prediction_storage]
        )
        ensemble_reward = calculate_rewards(test_data, ensemble_predictions)
        total_rewards["Ensemble"]["total_reward"] += ensemble_reward
        if ensemble_reward > 0:
            total_rewards["Ensemble"]["win_count"] += 1

        print(f"Ensemble                    : {ensemble_predictions} (당첨금: {ensemble_reward}원)")

    return total_rewards

# 높은 빈도의 숫자를 기반으로 예측
def predict_high_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nlargest(6).index.tolist()  # 상위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 낮은 빈도의 숫자를 기반으로 예측
def predict_low_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nsmallest(6).index.tolist()  # 하위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 높은 빈도 3개 + 낮은 빈도 3개 조합으로 예측
def predict_combined_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    most_frequent = frequency_counts.nlargest(3).index.tolist()  # 상위 3개 숫자
    least_frequent = frequency_counts.nsmallest(3).index.tolist()  # 하위 3개 숫자
    return ensure_unique_numbers(most_frequent + least_frequent)

# PCA를 사용하여 다음 벡터를 예측
def predict_next_vector(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numbers)  # 데이터 스케일링

    pca = PCA(n_components=1)
    principal_component = pca.fit_transform(scaled_data)  # PCA 변환

    next_vector = principal_component[-1] + (principal_component[-1] - principal_component[-2])  # 다음 벡터 예측
    inverse_scaled_vector = scaler.inverse_transform(scaled_data.mean(axis=0).reshape(1, -1)) + next_vector

    predicted_numbers = [int(round(num)) for num in inverse_scaled_vector[0] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# 신경망을 사용하여 번호를 예측
def predict_neural_network(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(6, activation='sigmoid')  # Using sigmoid to restrict range
    ])

    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])

    # Add random noise (jitter) to each prediction
    jittered_predictions = predictions + np.random.uniform(-0.05, 0.05, size=predictions.shape)
    jittered_predictions = np.clip(jittered_predictions, 0, 1)  # Ensure valid range for sigmoid outputs

    # Inverse transform to original scale
    inverse_transformed = scaler_y.inverse_transform(jittered_predictions)

    # Convert to integers and ensure range
    rounded_numbers = [int(round(num)) for num in inverse_transformed.flatten() if 1 <= round(num) <= 45]
    return ensure_unique_numbers(rounded_numbers[:6])

# 랜덤 포레스트를 사용하여 번호를 예측
def predict_random_forest(data, weeks_ago):
    from sklearn.ensemble import RandomForestRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# Gradient Boosting을 사용하여 번호를 예측
def predict_gradient_boosting(data, weeks_ago):
    from sklearn.ensemble import GradientBoostingRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    y_train = y_train.iloc[:, 0].values
    y_test = y_test.iloc[:, 0].values

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# LSTM을 사용하여 번호를 예측
def predict_lstm(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(numbers)

    X, y = [], []
    for i in range(len(scaled_data) - 10):
        X.append(scaled_data[i:i + 10])
        y.append(scaled_data[i + 10])

    X, y = np.array(X), np.array(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(128, return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(32, activation='relu'),
        Dense(6, activation='linear')
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])
    predicted_numbers = scaler.inverse_transform(predictions).flatten()
    return ensure_unique_numbers([int(round(num)) for num in predicted_numbers if 1 <= round(num) <= 45])

# Bayesian Optimization을 사용하여 번호를 예측
def predict_bayesian(data, weeks_ago):
    numbers = data.iloc[:, 2:].values

    def objective_function(x):
        selected_numbers = x
        count = sum(num in selected_numbers for num in numbers.flatten())
        return -count

    search_space = [Integer(1, 45) for _ in range(6)]
    result = gp_minimize(objective_function, search_space, n_calls=100)
    return ensure_unique_numbers(result.x)

# K-평균 클러스터링을 사용하여 번호를 예측
def predict_kmeans_clustering(data, weeks_ago, n_clusters=5):
    numbers = data.iloc[:, 2:].values.flatten().reshape(-1, 1)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(numbers)

    # 가장 빈번한 클러스터 선택
    cluster_counts = pd.Series(kmeans.labels_).value_counts()
    top_cluster = cluster_counts.idxmax()
    top_numbers = numbers[kmeans.labels_ == top_cluster]

    # 상위 6개의 번호 선택
    predicted_numbers = pd.Series(top_numbers.flatten()).value_counts().nlargest(6).index.tolist()
    return ensure_unique_numbers(predicted_numbers)

# 오토인코더를 사용하여 번호를 예측
def predict_autoencoder(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(numbers)

    # 오토인코더 정의
    input_dim = scaled_data.shape[1]
    autoencoder = Sequential([
        Input(shape=(input_dim,)),
        Dense(16, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

    # 복원된 데이터로 패턴 분석
    decoded_data = autoencoder.predict(scaled_data)
    predicted_numbers = [int(round(num)) for num in decoded_data[-1] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# Markov Chain을 사용하여 번호를 예측
def predict_markov_chain(data, weeks_ago):
    numbers = data.iloc[:, 2:].values.flatten()
    transitions = pd.crosstab(numbers[:-1], numbers[1:])
    probabilities = transitions.div(transitions.sum(axis=1), axis=0)

    last_number = numbers[-1]
    if last_number in probabilities.index:
        next_numbers = probabilities.loc[last_number].nlargest(6).index.tolist()
    else:
        next_numbers = random.sample(range(1, 46), 6)  # 기본값으로 랜덤 샘플 제공

    return ensure_unique_numbers(next_numbers)

# 메인 함수
def main():
    try:
        file_path = "Loto_number.xlsx"  # 엑셀 파일 경로
        data = pd.read_excel(file_path)
    except Exception as e:
        print(f"데이터 로드 실패: {e}")
        return

    # 예측 함수 매핑
    prediction_functions = {
        "total_high_frequency        ": predict_high_frequency_numbers,
        "total_low_frequency         ": predict_low_frequency_numbers,
        "total_combined_frequency    ": predict_combined_frequency_numbers,
        "total_next_vector           ": predict_next_vector,

        "recent_52_high_frequency    ": predict_high_frequency_numbers,
        "recent_52_low_frequency     ": predict_low_frequency_numbers,
        "recent_52_combined_frequency": predict_combined_frequency_numbers,
        "recent_52_next_vector       ": predict_next_vector,

        "recent_26_high_frequency    ": predict_high_frequency_numbers,
        "recent_26_low_frequency     ": predict_low_frequency_numbers,
        "recent_26_combined_frequency": predict_combined_frequency_numbers,
        "recent_26_next_vector       ": predict_next_vector,

        "neural_network              ": predict_neural_network,
        "random_forest               ": predict_random_forest,
        "gradient_boosting           ": predict_gradient_boosting,
        "LSTM                        ": predict_lstm,
        "Bayesian                    ": predict_bayesian,
        "KMeans                      ": predict_kmeans_clustering,
        "AutoEncoder                 ": predict_autoencoder,
        "MarkovChain                 ": predict_markov_chain
    }

    # 모델 성능 평가
    total_rewards = evaluate_predictions(data, prediction_functions)

    print("\n### 지난 일년(52주) 동안 각각의 방법으로 예측된 총당첨금 ###")
    for method, stats in total_rewards.items():
        total_reward = stats["total_reward"]
        win_count = stats["win_count"]
        print(f"{method}: 총 당첨 {total_reward}원({win_count}회)")
           
    print("\n### 다음 주 예상 번호 ###")
    all_predictions = []  # 모든 예측 결과를 저장할 리스트
    for method, func in prediction_functions.items():
        try:
            predicted = func(data, weeks_ago=1)
            predicted = ensure_unique_numbers(predicted)
            all_predictions.append(predicted)  # 앙상블에 포함할 결과 저장
            print(f"{method}: {predicted}")
        except Exception as e:
            print(f"{method}: 예측 실패 (오류: {e})")

    # 앙상블 예측 수행
    if all_predictions:
        ensemble_predictions = predict_ensemble_from_numpy([np.array(pred) for pred in all_predictions])
        print(f"\nEnsemble                    : {ensemble_predictions}")

if __name__ == "__main__":
    main()



### (52주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 7, 13, 28, 36, 42]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
recent_52_high_frequency    : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
recent_52_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_52_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_52_next_vector       : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
recent_26_high_frequency    : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
recent_26_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_26_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_26_next_vector       : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 59ms/step
neural_network              : [5, 1

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 9, 11, 16, 17, 27] (당첨금: -1000원)
MarkovChain                 : [3, 39, 40, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (47주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [1, 3, 4, 29, 42, 45]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency         : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [11, 18, 24, 31, 37, 44] (당첨금: -1000원)
recent_52_high_frequency    : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
recent_52_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_52_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_52_next_vector       : [11, 18, 24, 31, 37, 44] (당첨금: -1000원)
recent_26_high_frequency    : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
recent_26_low_frequency     : [5

gradient_boosting           : [5, 6, 8, 9, 20, 37] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 236ms/step
LSTM                        : [8, 13, 14, 33, 34, 40] (당첨금: -1000원)
Bayesian                    : [6, 16, 18, 39, 43, 45] (당첨금: -1000원)
KMeans                      : [3, 6, 7, 11, 12, 13] (당첨금: 4000원)
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 12, 13, 13, 19, 43] (당첨금: -1000원)
MarkovChain                 : [2, 41, 42, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (42주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [3, 13, 30, 33, 43, 45]
total_high_frequency        : [12, 18, 21, 33, 38, 45] (당첨금: -1000원)
total_low_frequency         : [5, 8, 9, 20, 25, 32] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [3, 10, 16, 23, 29, 36] (당첨금: -1000원)
recent_52_high_frequency    : [12,

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
neural_network              : [6, 10, 17, 30, 37, 39] (당첨금: -1000원)
random_forest               : [7, 12, 21, 27, 34, 41] (당첨금: 4000원)
gradient_boosting           : [3, 5, 6, 8, 23, 42] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step
LSTM                        : [8, 13, 34, 35, 38, 39] (당첨금: -1000원)
Bayesian                    : [16, 19, 34, 36, 38, 44] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 14, 16, 18] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 4, 18, 27, 35, 42] (당첨금: -1000원)
MarkovChain                 : [3, 39, 40, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (37주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [15, 16, 17, 25, 30, 31]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency     

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
neural_network              : [5, 14, 20, 24, 34, 42] (당첨금: -1000원)
random_forest               : [5, 13, 19, 25, 32, 39] (당첨금: -1000원)
gradient_boosting           : [4, 5, 7, 8, 9, 39] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step
LSTM                        : [7, 13, 32, 33, 39, 40] (당첨금: -1000원)
Bayesian                    : [7, 12, 18, 36, 43, 45] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 14, 16, 18] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 4, 15, 34, 44, 45] (당첨금: -1000원)
MarkovChain                 : [1, 2, 3, 4, 5, 6] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (32주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 24, 31, 32, 38, 44]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : [

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [8, 11, 14, 23, 30, 37] (당첨금: -1000원)
random_forest               : [7, 12, 20, 25, 35, 41] (당첨금: -1000원)
gradient_boosting           : [3, 4, 5, 7, 8, 43] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step
LSTM                        : [6, 13, 33, 34, 38, 39] (당첨금: -1000원)
Bayesian                    : [12, 16, 31, 33, 34, 38] (당첨금: -1000원)
KMeans                      : [18, 19, 21, 24, 26, 27] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 2, 20, 30, 37, 38] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (27주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [4, 5, 9, 11, 37, 40]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency       

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 65ms/step
neural_network              : [3, 7, 24, 27, 28, 36] (당첨금: -1000원)
random_forest               : [6, 11, 20, 27, 34, 38] (당첨금: -1000원)
gradient_boosting           : [1, 5, 6, 7, 9, 10] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 231ms/step
LSTM                        : [6, 7, 13, 20, 34, 39] (당첨금: -1000원)
Bayesian                    : [6, 7, 11, 13, 18, 38] (당첨금: -1000원)
KMeans                      : [36, 38, 39, 43, 44, 45] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 9, 16, 24, 26, 42] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (22주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [1, 2, 6, 14, 27, 38]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : 

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
neural_network              : [10, 17, 21, 26, 33, 40] (당첨금: -1000원)
random_forest               : [8, 14, 20, 29, 35, 41] (당첨금: -1000원)
gradient_boosting           : [5, 7, 10, 11, 13, 39] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step
LSTM                        : [6, 7, 32, 33, 39, 40] (당첨금: -1000원)
Bayesian                    : [1, 6, 7, 18, 19, 45] (당첨금: 4000원)
KMeans                      : [11, 12, 13, 16, 18, 19] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [3, 20, 26, 29, 34, 45] (당첨금: -1000원)
MarkovChain                 : [25, 27, 29, 30, 31, 32] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (17주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [21, 33, 35, 38, 42, 44]
total_high_frequency        : [6, 12, 18, 19, 21, 33] (당첨금: -1000원)
total_low_frequency     

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 66ms/step
neural_network              : [7, 8, 13, 23, 28, 32] (당첨금: -1000원)
random_forest               : [6, 12, 17, 26, 33, 40] (당첨금: -1000원)
gradient_boosting           : [5, 6, 7, 9, 30, 43] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 229ms/step
LSTM                        : [6, 12, 19, 32, 38, 39] (당첨금: -1000원)
Bayesian                    : [13, 16, 18, 19, 33, 45] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 16, 18, 19] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 4, 9, 16, 33, 44] (당첨금: -1000원)
MarkovChain                 : [2, 41, 42, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (12주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [7, 11, 12, 21, 26, 35]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency       

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
neural_network              : [4, 12, 17, 29, 33, 43] (당첨금: -1000원)
random_forest               : [5, 11, 18, 23, 30, 39] (당첨금: -1000원)
gradient_boosting           : [5, 7, 8, 14, 35, 44] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 237ms/step
LSTM                        : [6, 7, 12, 19, 32, 38] (당첨금: -1000원)
Bayesian                    : [1, 12, 16, 19, 38, 45] (당첨금: -1000원)
KMeans                      : [28, 30, 31, 33, 34, 36] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 809us/step
AutoEncoder                 : [1, 7, 8, 9, 17, 33] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (7주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 11, 17, 19, 40, 43]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency       

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 70ms/step
neural_network              : [7, 11, 21, 22, 32, 38] (당첨금: -1000원)
random_forest               : [8, 13, 20, 27, 35, 40] (당첨금: -1000원)
gradient_boosting           : [4, 5, 7, 19, 21, 26] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 225ms/step
LSTM                        : [7, 14, 21, 32, 38, 39] (당첨금: -1000원)
Bayesian                    : [21, 31, 33, 38, 41, 45] (당첨금: -1000원)
KMeans                      : [28, 30, 31, 33, 34, 36] (당첨금: -1000원)
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 15, 25, 26, 29, 41] (당첨금: -1000원)
MarkovChain                 : [1, 3, 37, 38, 39, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (2주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [2, 3, 9, 15, 27, 29]
total_high_frequency        : [6, 12, 18, 19, 21, 33] (당첨금: -1000원)
total_low_frequency        

In [None]:
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
import numpy as np
from skopt import gp_minimize
from skopt.space import Integer

# 번호의 중복 제거 및 부족 시 채워주는 함수
def ensure_unique_numbers(numbers):
    numbers = list(set(numbers))  # 중복 제거
    while len(numbers) < 6:
        numbers.append(random.randint(1, 45))  # 부족한 번호를 무작위로 추가
    return sorted(numbers[:6])

# 실제 번호와 예측 번호를 비교하여 보상을 계산하는 함수
def calculate_rewards(actual, predicted):
    match_count = len(set(actual) & set(predicted))  # 일치하는 번호 개수 계산
    rewards = {6: 10_000_000_000, 5: 1_499_000, 4: 49_000, 3: 4_000}
    return rewards.get(match_count, -1_000)  # 일치하지 않으면 -1,000 반환

# 앙상블 예측 함수
def predict_ensemble_from_numpy(predictions_list):
    """
    각 모델의 예측값을 numpy array로 받아 앙상블 예측 수행.
    """
    flat_predictions = np.concatenate(predictions_list).flatten()
    flat_predictions = flat_predictions[(flat_predictions >= 1) & (flat_predictions <= 45)]  # 유효 번호 필터링
    frequency_counts = pd.Series(flat_predictions).value_counts()

    if frequency_counts.empty:
        print("빈도 계산 결과가 없습니다. 빈 결과를 반환합니다.")
        return []

    return ensure_unique_numbers(frequency_counts.nlargest(6).index.tolist())

# 예측 모델의 성능을 평가하는 함수
def evaluate_predictions(data, prediction_functions, weeks_to_evaluate=52):
    total_rewards = {method: {"total_reward": 0, "win_count": 0} for method in prediction_functions.keys()}
    total_rewards["Ensemble"] = {"total_reward": 0, "win_count": 0}  # 앙상블 결과 추가
    prediction_storage = {method: [] for method in prediction_functions.keys()}

    for weeks_ago in range(weeks_to_evaluate, 0, -1):
        train_data = data[:-weeks_ago]
        test_data = data.iloc[-weeks_ago].values[2:]

        print(f"\n### ({weeks_ago}주전)예측되는 번호와 당첨 금액 ###")
        print(f"=> 실제 당첨 번호           : {sorted(test_data)}")

        for method, func in prediction_functions.items():
            predicted = func(train_data, weeks_ago)
            predicted = ensure_unique_numbers(predicted)

            if not predicted:
                print(f"{method}의 예측 결과가 비어 있습니다.")
                continue

            prediction_storage[method].append(predicted)  # 숫자 리스트 저장
            reward = calculate_rewards(test_data, predicted)
            total_rewards[method]["total_reward"] += reward
            if reward > 0:
                total_rewards[method]["win_count"] += 1

            print(f"{method}: {sorted(predicted)} (당첨금: {reward}원)")

        # 앙상블 예측 수행
        ensemble_predictions = predict_ensemble_from_numpy(
            [np.array(prediction_storage[method]) for method in prediction_storage]
        )
        ensemble_reward = calculate_rewards(test_data, ensemble_predictions)
        total_rewards["Ensemble"]["total_reward"] += ensemble_reward
        if ensemble_reward > 0:
            total_rewards["Ensemble"]["win_count"] += 1

        print(f"Ensemble                    : {ensemble_predictions} (당첨금: {ensemble_reward}원)")

    return total_rewards

# 높은 빈도의 숫자를 기반으로 예측
def predict_high_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nlargest(6).index.tolist()  # 상위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 낮은 빈도의 숫자를 기반으로 예측
def predict_low_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nsmallest(6).index.tolist()  # 하위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 높은 빈도 3개 + 낮은 빈도 3개 조합으로 예측
def predict_combined_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    most_frequent = frequency_counts.nlargest(3).index.tolist()  # 상위 3개 숫자
    least_frequent = frequency_counts.nsmallest(3).index.tolist()  # 하위 3개 숫자
    return ensure_unique_numbers(most_frequent + least_frequent)

# PCA를 사용하여 다음 벡터를 예측
def predict_next_vector(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numbers)  # 데이터 스케일링

    pca = PCA(n_components=1)
    principal_component = pca.fit_transform(scaled_data)  # PCA 변환

    next_vector = principal_component[-1] + (principal_component[-1] - principal_component[-2])  # 다음 벡터 예측
    inverse_scaled_vector = scaler.inverse_transform(scaled_data.mean(axis=0).reshape(1, -1)) + next_vector

    predicted_numbers = [int(round(num)) for num in inverse_scaled_vector[0] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# 신경망을 사용하여 번호를 예측
def predict_neural_network(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(6, activation='sigmoid')  # Using sigmoid to restrict range
    ])

    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])

    # Add random noise (jitter) to each prediction
    jittered_predictions = predictions + np.random.uniform(-0.05, 0.05, size=predictions.shape)
    jittered_predictions = np.clip(jittered_predictions, 0, 1)  # Ensure valid range for sigmoid outputs

    # Inverse transform to original scale
    inverse_transformed = scaler_y.inverse_transform(jittered_predictions)

    # Convert to integers and ensure range
    rounded_numbers = [int(round(num)) for num in inverse_transformed.flatten() if 1 <= round(num) <= 45]
    return ensure_unique_numbers(rounded_numbers[:6])

# 랜덤 포레스트를 사용하여 번호를 예측
def predict_random_forest(data, weeks_ago):
    from sklearn.ensemble import RandomForestRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# Gradient Boosting을 사용하여 번호를 예측
def predict_gradient_boosting(data, weeks_ago):
    from sklearn.ensemble import GradientBoostingRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    y_train = y_train.iloc[:, 0].values
    y_test = y_test.iloc[:, 0].values

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# LSTM을 사용하여 번호를 예측
def predict_lstm(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(numbers)

    X, y = [], []
    for i in range(len(scaled_data) - 10):
        X.append(scaled_data[i:i + 10])
        y.append(scaled_data[i + 10])

    X, y = np.array(X), np.array(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(128, return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(32, activation='relu'),
        Dense(6, activation='linear')
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])
    predicted_numbers = scaler.inverse_transform(predictions).flatten()
    return ensure_unique_numbers([int(round(num)) for num in predicted_numbers if 1 <= round(num) <= 45])

# Bayesian Optimization을 사용하여 번호를 예측
def predict_bayesian(data, weeks_ago):
    numbers = data.iloc[:, 2:].values

    def objective_function(x):
        selected_numbers = x
        count = sum(num in selected_numbers for num in numbers.flatten())
        return -count

    search_space = [Integer(1, 45) for _ in range(6)]
    result = gp_minimize(objective_function, search_space, n_calls=100)
    return ensure_unique_numbers(result.x)

# K-평균 클러스터링을 사용하여 번호를 예측
def predict_kmeans_clustering(data, weeks_ago, n_clusters=5):
    numbers = data.iloc[:, 2:].values.flatten().reshape(-1, 1)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(numbers)

    # 가장 빈번한 클러스터 선택
    cluster_counts = pd.Series(kmeans.labels_).value_counts()
    top_cluster = cluster_counts.idxmax()
    top_numbers = numbers[kmeans.labels_ == top_cluster]

    # 상위 6개의 번호 선택
    predicted_numbers = pd.Series(top_numbers.flatten()).value_counts().nlargest(6).index.tolist()
    return ensure_unique_numbers(predicted_numbers)

# 오토인코더를 사용하여 번호를 예측
def predict_autoencoder(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(numbers)

    # 오토인코더 정의
    input_dim = scaled_data.shape[1]
    autoencoder = Sequential([
        Input(shape=(input_dim,)),
        Dense(16, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

    # 복원된 데이터로 패턴 분석
    decoded_data = autoencoder.predict(scaled_data)
    predicted_numbers = [int(round(num)) for num in decoded_data[-1] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# Markov Chain을 사용하여 번호를 예측
def predict_markov_chain(data, weeks_ago):
    numbers = data.iloc[:, 2:].values.flatten()
    transitions = pd.crosstab(numbers[:-1], numbers[1:])
    probabilities = transitions.div(transitions.sum(axis=1), axis=0)

    last_number = numbers[-1]
    if last_number in probabilities.index:
        next_numbers = probabilities.loc[last_number].nlargest(6).index.tolist()
    else:
        next_numbers = random.sample(range(1, 46), 6)  # 기본값으로 랜덤 샘플 제공

    return ensure_unique_numbers(next_numbers)

# 메인 함수
def main():
    try:
        file_path = "Loto_number.xlsx"  # 엑셀 파일 경로
        data = pd.read_excel(file_path)
    except Exception as e:
        print(f"데이터 로드 실패: {e}")
        return

    # 예측 함수 매핑
    prediction_functions = {
        "total_high_frequency        ": predict_high_frequency_numbers,
        "total_low_frequency         ": predict_low_frequency_numbers,
        "total_combined_frequency    ": predict_combined_frequency_numbers,
        "total_next_vector           ": predict_next_vector,

        "recent_52_high_frequency    ": predict_high_frequency_numbers,
        "recent_52_low_frequency     ": predict_low_frequency_numbers,
        "recent_52_combined_frequency": predict_combined_frequency_numbers,
        "recent_52_next_vector       ": predict_next_vector,

        "recent_26_high_frequency    ": predict_high_frequency_numbers,
        "recent_26_low_frequency     ": predict_low_frequency_numbers,
        "recent_26_combined_frequency": predict_combined_frequency_numbers,
        "recent_26_next_vector       ": predict_next_vector,

        "neural_network              ": predict_neural_network,
        "random_forest               ": predict_random_forest,
        "gradient_boosting           ": predict_gradient_boosting,
        "LSTM                        ": predict_lstm,
        "Bayesian                    ": predict_bayesian,
        "KMeans                      ": predict_kmeans_clustering,
        "AutoEncoder                 ": predict_autoencoder,
        "MarkovChain                 ": predict_markov_chain
    }

    # 모델 성능 평가
    total_rewards = evaluate_predictions(data, prediction_functions)

    print("\n### 지난 일년(52주) 동안 각각의 방법으로 예측된 총당첨금 ###")
    for method, stats in total_rewards.items():
        total_reward = stats["total_reward"]
        win_count = stats["win_count"]
        print(f"{method}: 총 당첨 {total_reward}원({win_count}회)")
           
    print("\n### 다음 주 예상 번호 ###")
    all_predictions = []  # 모든 예측 결과를 저장할 리스트
    for method, func in prediction_functions.items():
        try:
            predicted = func(data, weeks_ago=1)
            predicted = ensure_unique_numbers(predicted)
            all_predictions.append(predicted)  # 앙상블에 포함할 결과 저장
            print(f"{method}: {predicted}")
        except Exception as e:
            print(f"{method}: 예측 실패 (오류: {e})")

    # 앙상블 예측 수행
    if all_predictions:
        ensemble_predictions = predict_ensemble_from_numpy([np.array(pred) for pred in all_predictions])
        print(f"\nEnsemble                    : {ensemble_predictions}")

if __name__ == "__main__":
    main()



### (52주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 7, 13, 28, 36, 42]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
recent_52_high_frequency    : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
recent_52_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_52_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_52_next_vector       : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
recent_26_high_frequency    : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
recent_26_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_26_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_26_next_vector       : [9, 16, 23, 29, 36, 42] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
neural_network              : [4, 8

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 2, 29, 31, 40, 45] (당첨금: -1000원)
MarkovChain                 : [3, 39, 40, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (47주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [1, 3, 4, 29, 42, 45]
total_high_frequency        : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
total_low_frequency         : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [11, 18, 24, 31, 37, 44] (당첨금: -1000원)
recent_52_high_frequency    : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
recent_52_low_frequency     : [5, 9, 20, 25, 32, 40] (당첨금: -1000원)
recent_52_combined_frequency: [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
recent_52_next_vector       : [11, 18, 24, 31, 37, 44] (당첨금: -1000원)
recent_26_high_frequency    : [12, 16, 18, 21, 33, 38] (당첨금: -1000원)
recent_26_low_frequency     : [5

gradient_boosting           : [4, 6, 7, 8, 11, 41] (당첨금: 4000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 289ms/step
LSTM                        : [6, 12, 32, 33, 39, 40] (당첨금: -1000원)
Bayesian                    : [1, 7, 12, 38, 39, 45] (당첨금: -1000원)
KMeans                      : [3, 6, 7, 11, 12, 13] (당첨금: 4000원)
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 21, 24, 33, 36, 38] (당첨금: -1000원)
MarkovChain                 : [2, 41, 42, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (42주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [3, 13, 30, 33, 43, 45]
total_high_frequency        : [12, 18, 21, 33, 38, 45] (당첨금: -1000원)
total_low_frequency         : [5, 8, 9, 20, 25, 32] (당첨금: -1000원)
total_combined_frequency    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)
total_next_vector           : [3, 10, 16, 23, 29, 36] (당첨금: -1000원)
recent_52_high_frequency    : [12, 1

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
neural_network              : [7, 11, 21, 30, 32, 37] (당첨금: -1000원)
random_forest               : [7, 14, 19, 26, 35, 40] (당첨금: -1000원)
gradient_boosting           : [4, 5, 7, 8, 12, 20] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 263ms/step
LSTM                        : [6, 12, 32, 33, 39, 40] (당첨금: -1000원)
Bayesian                    : [6, 7, 18, 21, 31, 43] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 14, 16, 18] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 2, 6, 8, 22, 31] (당첨금: -1000원)
MarkovChain                 : [3, 39, 40, 43, 44, 45] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (37주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [15, 16, 17, 25, 30, 31]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency        

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step
neural_network              : [5, 14, 15, 26, 32, 39] (당첨금: -1000원)
random_forest               : [5, 15, 20, 26, 32, 40] (당첨금: -1000원)
gradient_boosting           : [4, 6, 7, 9, 18, 27] (당첨금: -1000원)
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 240ms/step
LSTM                        : [7, 15, 21, 34, 40, 41] (당첨금: -1000원)
Bayesian                    : [6, 7, 12, 13, 21, 45] (당첨금: -1000원)
KMeans                      : [11, 12, 13, 14, 16, 18] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 
AutoEncoder                 : [1, 9, 13, 14, 40, 45] (당첨금: -1000원)
MarkovChain                 : [1, 2, 3, 4, 5, 6] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 32] (당첨금: -1000원)

### (32주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [6, 24, 31, 32, 38, 44]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         : [

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 68ms/step
neural_network              : [5, 8, 19, 22, 30, 37] (당첨금: -1000원)
random_forest               : [7, 11, 20, 26, 35, 39] (당첨금: -1000원)
gradient_boosting           : [3, 5, 6, 7, 9, 40] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 242ms/step
LSTM                        : [6, 12, 13, 21, 33, 41] (당첨금: -1000원)
Bayesian                    : [16, 18, 21, 33, 38, 43] (당첨금: -1000원)
KMeans                      : [18, 19, 21, 24, 26, 27] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 2, 5, 11, 19, 40] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (27주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [4, 5, 9, 11, 37, 40]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 67ms/step
neural_network              : [5, 12, 13, 28, 30, 38] (당첨금: -1000원)
random_forest               : [7, 11, 18, 25, 34, 39] (당첨금: -1000원)
gradient_boosting           : [5, 6, 7, 8, 9, 12] (당첨금: -1000원)
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 243ms/step
LSTM                        : [6, 7, 13, 20, 33, 40] (당첨금: -1000원)
Bayesian                    : [3, 11, 12, 18, 41, 43] (당첨금: -1000원)
KMeans                      : [36, 38, 39, 43, 44, 45] (당첨금: -1000원)
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step 
AutoEncoder                 : [1, 2, 12, 17, 27, 38] (당첨금: -1000원)
MarkovChain                 : [35, 36, 37, 38, 39, 42] (당첨금: -1000원)
Ensemble                    : [5, 12, 18, 20, 21, 33] (당첨금: -1000원)

### (22주전)예측되는 번호와 당첨 금액 ###
=> 실제 당첨 번호           : [1, 2, 6, 14, 27, 38]
total_high_frequency        : [12, 16, 18, 21, 33, 45] (당첨금: -1000원)
total_low_frequency         

In [None]:
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
import numpy as np
from skopt import gp_minimize
from skopt.space import Integer

# 번호의 중복 제거 및 부족 시 채워주는 함수
def ensure_unique_numbers(numbers):
    numbers = list(set(numbers))  # 중복 제거
    while len(numbers) < 6:
        numbers.append(random.randint(1, 45))  # 부족한 번호를 무작위로 추가
    return sorted(numbers[:6])

# 실제 번호와 예측 번호를 비교하여 보상을 계산하는 함수
def calculate_rewards(actual, predicted):
    match_count = len(set(actual) & set(predicted))  # 일치하는 번호 개수 계산
    rewards = {6: 10_000_000_000, 5: 1_499_000, 4: 49_000, 3: 4_000}
    return rewards.get(match_count, -1_000)  # 일치하지 않으면 -1,000 반환

# 앙상블 예측 함수
def predict_ensemble_from_numpy(predictions_list):
    """
    각 모델의 예측값을 numpy array로 받아 앙상블 예측 수행.
    """
    flat_predictions = np.concatenate(predictions_list).flatten()
    flat_predictions = flat_predictions[(flat_predictions >= 1) & (flat_predictions <= 45)]  # 유효 번호 필터링
    frequency_counts = pd.Series(flat_predictions).value_counts()

    if frequency_counts.empty:
        print("빈도 계산 결과가 없습니다. 빈 결과를 반환합니다.")
        return []

    return ensure_unique_numbers(frequency_counts.nlargest(6).index.tolist())

# 예측 모델의 성능을 평가하는 함수
def evaluate_predictions(data, prediction_functions, weeks_to_evaluate=52):
    total_rewards = {method: {"total_reward": 0, "win_count": 0} for method in prediction_functions.keys()}
    total_rewards["Ensemble"] = {"total_reward": 0, "win_count": 0}  # 앙상블 결과 추가
    prediction_storage = {method: [] for method in prediction_functions.keys()}

    for weeks_ago in range(weeks_to_evaluate, 0, -1):
        train_data = data[:-weeks_ago]
        test_data = data.iloc[-weeks_ago].values[2:]

        print(f"\n### ({weeks_ago}주전)예측되는 번호와 당첨 금액 ###")
        print(f"=> 실제 당첨 번호           : {sorted(test_data)}")

        for method, func in prediction_functions.items():
            predicted = func(train_data, weeks_ago)
            predicted = ensure_unique_numbers(predicted)

            if not predicted:
                print(f"{method}의 예측 결과가 비어 있습니다.")
                continue

            prediction_storage[method].append(predicted)  # 숫자 리스트 저장
            reward = calculate_rewards(test_data, predicted)
            total_rewards[method]["total_reward"] += reward
            if reward > 0:
                total_rewards[method]["win_count"] += 1

            print(f"{method}: {sorted(predicted)} (당첨금: {reward}원)")

        # 앙상블 예측 수행
        ensemble_predictions = predict_ensemble_from_numpy(
            [np.array(prediction_storage[method]) for method in prediction_storage]
        )
        ensemble_reward = calculate_rewards(test_data, ensemble_predictions)
        total_rewards["Ensemble"]["total_reward"] += ensemble_reward
        if ensemble_reward > 0:
            total_rewards["Ensemble"]["win_count"] += 1

        print(f"Ensemble                    : {ensemble_predictions} (당첨금: {ensemble_reward}원)")

    return total_rewards

# 높은 빈도의 숫자를 기반으로 예측
def predict_high_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nlargest(6).index.tolist()  # 상위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 낮은 빈도의 숫자를 기반으로 예측
def predict_low_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nsmallest(6).index.tolist()  # 하위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 높은 빈도 3개 + 낮은 빈도 3개 조합으로 예측
def predict_combined_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    most_frequent = frequency_counts.nlargest(3).index.tolist()  # 상위 3개 숫자
    least_frequent = frequency_counts.nsmallest(3).index.tolist()  # 하위 3개 숫자
    return ensure_unique_numbers(most_frequent + least_frequent)

# PCA를 사용하여 다음 벡터를 예측
def predict_next_vector(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numbers)  # 데이터 스케일링

    pca = PCA(n_components=1)
    principal_component = pca.fit_transform(scaled_data)  # PCA 변환

    next_vector = principal_component[-1] + (principal_component[-1] - principal_component[-2])  # 다음 벡터 예측
    inverse_scaled_vector = scaler.inverse_transform(scaled_data.mean(axis=0).reshape(1, -1)) + next_vector

    predicted_numbers = [int(round(num)) for num in inverse_scaled_vector[0] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# 신경망을 사용하여 번호를 예측
def predict_neural_network(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(6, activation='sigmoid')  # Using sigmoid to restrict range
    ])

    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])

    # Add random noise (jitter) to each prediction
    jittered_predictions = predictions + np.random.uniform(-0.05, 0.05, size=predictions.shape)
    jittered_predictions = np.clip(jittered_predictions, 0, 1)  # Ensure valid range for sigmoid outputs

    # Inverse transform to original scale
    inverse_transformed = scaler_y.inverse_transform(jittered_predictions)

    # Convert to integers and ensure range
    rounded_numbers = [int(round(num)) for num in inverse_transformed.flatten() if 1 <= round(num) <= 45]
    return ensure_unique_numbers(rounded_numbers[:6])

# 랜덤 포레스트를 사용하여 번호를 예측
def predict_random_forest(data, weeks_ago):
    from sklearn.ensemble import RandomForestRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# Gradient Boosting을 사용하여 번호를 예측
def predict_gradient_boosting(data, weeks_ago):
    from sklearn.ensemble import GradientBoostingRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    y_train = y_train.iloc[:, 0].values
    y_test = y_test.iloc[:, 0].values

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# LSTM을 사용하여 번호를 예측
def predict_lstm(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(numbers)

    X, y = [], []
    for i in range(len(scaled_data) - 10):
        X.append(scaled_data[i:i + 10])
        y.append(scaled_data[i + 10])

    X, y = np.array(X), np.array(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(128, return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(32, activation='relu'),
        Dense(6, activation='linear')
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])
    predicted_numbers = scaler.inverse_transform(predictions).flatten()
    return ensure_unique_numbers([int(round(num)) for num in predicted_numbers if 1 <= round(num) <= 45])

# Bayesian Optimization을 사용하여 번호를 예측
def predict_bayesian(data, weeks_ago):
    numbers = data.iloc[:, 2:].values

    def objective_function(x):
        selected_numbers = x
        count = sum(num in selected_numbers for num in numbers.flatten())
        return -count

    search_space = [Integer(1, 45) for _ in range(6)]
    result = gp_minimize(objective_function, search_space, n_calls=100)
    return ensure_unique_numbers(result.x)

# K-평균 클러스터링을 사용하여 번호를 예측
def predict_kmeans_clustering(data, weeks_ago, n_clusters=5):
    numbers = data.iloc[:, 2:].values.flatten().reshape(-1, 1)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(numbers)

    # 가장 빈번한 클러스터 선택
    cluster_counts = pd.Series(kmeans.labels_).value_counts()
    top_cluster = cluster_counts.idxmax()
    top_numbers = numbers[kmeans.labels_ == top_cluster]

    # 상위 6개의 번호 선택
    predicted_numbers = pd.Series(top_numbers.flatten()).value_counts().nlargest(6).index.tolist()
    return ensure_unique_numbers(predicted_numbers)

# 오토인코더를 사용하여 번호를 예측
def predict_autoencoder(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(numbers)

    # 오토인코더 정의
    input_dim = scaled_data.shape[1]
    autoencoder = Sequential([
        Input(shape=(input_dim,)),
        Dense(16, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

    # 복원된 데이터로 패턴 분석
    decoded_data = autoencoder.predict(scaled_data)
    predicted_numbers = [int(round(num)) for num in decoded_data[-1] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# Markov Chain을 사용하여 번호를 예측
def predict_markov_chain(data, weeks_ago):
    numbers = data.iloc[:, 2:].values.flatten()
    transitions = pd.crosstab(numbers[:-1], numbers[1:])
    probabilities = transitions.div(transitions.sum(axis=1), axis=0)

    last_number = numbers[-1]
    if last_number in probabilities.index:
        next_numbers = probabilities.loc[last_number].nlargest(6).index.tolist()
    else:
        next_numbers = random.sample(range(1, 46), 6)  # 기본값으로 랜덤 샘플 제공

    return ensure_unique_numbers(next_numbers)

# 메인 함수
def main():
    try:
        file_path = "Loto_number.xlsx"  # 엑셀 파일 경로
        data = pd.read_excel(file_path)
    except Exception as e:
        print(f"데이터 로드 실패: {e}")
        return

    # 예측 함수 매핑
    prediction_functions = {
        "total_high_frequency        ": predict_high_frequency_numbers,
        "total_low_frequency         ": predict_low_frequency_numbers,
        "total_combined_frequency    ": predict_combined_frequency_numbers,
        "total_next_vector           ": predict_next_vector,

        "recent_52_high_frequency    ": predict_high_frequency_numbers,
        "recent_52_low_frequency     ": predict_low_frequency_numbers,
        "recent_52_combined_frequency": predict_combined_frequency_numbers,
        "recent_52_next_vector       ": predict_next_vector,

        "recent_26_high_frequency    ": predict_high_frequency_numbers,
        "recent_26_low_frequency     ": predict_low_frequency_numbers,
        "recent_26_combined_frequency": predict_combined_frequency_numbers,
        "recent_26_next_vector       ": predict_next_vector,

        "neural_network              ": predict_neural_network,
        "random_forest               ": predict_random_forest,
        "gradient_boosting           ": predict_gradient_boosting,
        "LSTM                        ": predict_lstm,
        "Bayesian                    ": predict_bayesian,
        "KMeans                      ": predict_kmeans_clustering,
        "AutoEncoder                 ": predict_autoencoder,
        "MarkovChain                 ": predict_markov_chain
    }

    # 모델 성능 평가
    total_rewards = evaluate_predictions(data, prediction_functions)

    print("\n### 지난 일년(52주) 동안 각각의 방법으로 예측된 총당첨금 ###")
    for method, stats in total_rewards.items():
        total_reward = stats["total_reward"]
        win_count = stats["win_count"]
        print(f"{method}: 총 당첨 {total_reward}원({win_count}회)")
           
    print("\n### 다음 주 예상 번호 ###")
    all_predictions = []  # 모든 예측 결과를 저장할 리스트
    for method, func in prediction_functions.items():
        try:
            predicted = func(data, weeks_ago=1)
            predicted = ensure_unique_numbers(predicted)
            all_predictions.append(predicted)  # 앙상블에 포함할 결과 저장
            print(f"{method}: {predicted}")
        except Exception as e:
            print(f"{method}: 예측 실패 (오류: {e})")

    # 앙상블 예측 수행
    if all_predictions:
        ensemble_predictions = predict_ensemble_from_numpy([np.array(pred) for pred in all_predictions])
        print(f"\nEnsemble                    : {ensemble_predictions}")

if __name__ == "__main__":
    main()


In [None]:
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
import numpy as np
from skopt import gp_minimize
from skopt.space import Integer

# 번호의 중복 제거 및 부족 시 채워주는 함수
def ensure_unique_numbers(numbers):
    numbers = list(set(numbers))  # 중복 제거
    while len(numbers) < 6:
        numbers.append(random.randint(1, 45))  # 부족한 번호를 무작위로 추가
    return sorted(numbers[:6])

# 실제 번호와 예측 번호를 비교하여 보상을 계산하는 함수
def calculate_rewards(actual, predicted):
    match_count = len(set(actual) & set(predicted))  # 일치하는 번호 개수 계산
    rewards = {6: 10_000_000_000, 5: 1_499_000, 4: 49_000, 3: 4_000}
    return rewards.get(match_count, -1_000)  # 일치하지 않으면 -1,000 반환

# 앙상블 예측 함수
def predict_ensemble_from_numpy(predictions_list):
    """
    각 모델의 예측값을 numpy array로 받아 앙상블 예측 수행.
    """
    flat_predictions = np.concatenate(predictions_list).flatten()
    flat_predictions = flat_predictions[(flat_predictions >= 1) & (flat_predictions <= 45)]  # 유효 번호 필터링
    frequency_counts = pd.Series(flat_predictions).value_counts()

    if frequency_counts.empty:
        print("빈도 계산 결과가 없습니다. 빈 결과를 반환합니다.")
        return []

    return ensure_unique_numbers(frequency_counts.nlargest(6).index.tolist())

# 예측 모델의 성능을 평가하는 함수
def evaluate_predictions(data, prediction_functions, weeks_to_evaluate=52):
    total_rewards = {method: {"total_reward": 0, "win_count": 0} for method in prediction_functions.keys()}
    total_rewards["Ensemble"] = {"total_reward": 0, "win_count": 0}  # 앙상블 결과 추가
    prediction_storage = {method: [] for method in prediction_functions.keys()}

    for weeks_ago in range(weeks_to_evaluate, 0, -1):
        train_data = data[:-weeks_ago]
        test_data = data.iloc[-weeks_ago].values[2:]

        print(f"\n### ({weeks_ago}주전)예측되는 번호와 당첨 금액 ###")
        print(f"=> 실제 당첨 번호           : {sorted(test_data)}")

        for method, func in prediction_functions.items():
            predicted = func(train_data, weeks_ago)
            predicted = ensure_unique_numbers(predicted)

            if not predicted:
                print(f"{method}의 예측 결과가 비어 있습니다.")
                continue

            prediction_storage[method].append(predicted)  # 숫자 리스트 저장
            reward = calculate_rewards(test_data, predicted)
            total_rewards[method]["total_reward"] += reward
            if reward > 0:
                total_rewards[method]["win_count"] += 1

            print(f"{method}: {sorted(predicted)} (당첨금: {reward}원)")

        # 앙상블 예측 수행
        ensemble_predictions = predict_ensemble_from_numpy(
            [np.array(prediction_storage[method]) for method in prediction_storage]
        )
        ensemble_reward = calculate_rewards(test_data, ensemble_predictions)
        total_rewards["Ensemble"]["total_reward"] += ensemble_reward
        if ensemble_reward > 0:
            total_rewards["Ensemble"]["win_count"] += 1

        print(f"Ensemble                    : {ensemble_predictions} (당첨금: {ensemble_reward}원)")

    return total_rewards

# 높은 빈도의 숫자를 기반으로 예측
def predict_high_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nlargest(6).index.tolist()  # 상위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 낮은 빈도의 숫자를 기반으로 예측
def predict_low_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nsmallest(6).index.tolist()  # 하위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 높은 빈도 3개 + 낮은 빈도 3개 조합으로 예측
def predict_combined_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    most_frequent = frequency_counts.nlargest(3).index.tolist()  # 상위 3개 숫자
    least_frequent = frequency_counts.nsmallest(3).index.tolist()  # 하위 3개 숫자
    return ensure_unique_numbers(most_frequent + least_frequent)

# PCA를 사용하여 다음 벡터를 예측
def predict_next_vector(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numbers)  # 데이터 스케일링

    pca = PCA(n_components=1)
    principal_component = pca.fit_transform(scaled_data)  # PCA 변환

    next_vector = principal_component[-1] + (principal_component[-1] - principal_component[-2])  # 다음 벡터 예측
    inverse_scaled_vector = scaler.inverse_transform(scaled_data.mean(axis=0).reshape(1, -1)) + next_vector

    predicted_numbers = [int(round(num)) for num in inverse_scaled_vector[0] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# 신경망을 사용하여 번호를 예측
def predict_neural_network(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(6, activation='sigmoid')  # Using sigmoid to restrict range
    ])

    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])

    # Add random noise (jitter) to each prediction
    jittered_predictions = predictions + np.random.uniform(-0.05, 0.05, size=predictions.shape)
    jittered_predictions = np.clip(jittered_predictions, 0, 1)  # Ensure valid range for sigmoid outputs

    # Inverse transform to original scale
    inverse_transformed = scaler_y.inverse_transform(jittered_predictions)

    # Convert to integers and ensure range
    rounded_numbers = [int(round(num)) for num in inverse_transformed.flatten() if 1 <= round(num) <= 45]
    return ensure_unique_numbers(rounded_numbers[:6])

# 랜덤 포레스트를 사용하여 번호를 예측
def predict_random_forest(data, weeks_ago):
    from sklearn.ensemble import RandomForestRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# Gradient Boosting을 사용하여 번호를 예측
def predict_gradient_boosting(data, weeks_ago):
    from sklearn.ensemble import GradientBoostingRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    y_train = y_train.iloc[:, 0].values
    y_test = y_test.iloc[:, 0].values

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# LSTM을 사용하여 번호를 예측
def predict_lstm(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(numbers)

    X, y = [], []
    for i in range(len(scaled_data) - 10):
        X.append(scaled_data[i:i + 10])
        y.append(scaled_data[i + 10])

    X, y = np.array(X), np.array(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(128, return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(32, activation='relu'),
        Dense(6, activation='linear')
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])
    predicted_numbers = scaler.inverse_transform(predictions).flatten()
    return ensure_unique_numbers([int(round(num)) for num in predicted_numbers if 1 <= round(num) <= 45])

# Bayesian Optimization을 사용하여 번호를 예측
def predict_bayesian(data, weeks_ago):
    numbers = data.iloc[:, 2:].values

    def objective_function(x):
        selected_numbers = x
        count = sum(num in selected_numbers for num in numbers.flatten())
        return -count

    search_space = [Integer(1, 45) for _ in range(6)]
    result = gp_minimize(objective_function, search_space, n_calls=100)
    return ensure_unique_numbers(result.x)

# K-평균 클러스터링을 사용하여 번호를 예측
def predict_kmeans_clustering(data, weeks_ago, n_clusters=5):
    numbers = data.iloc[:, 2:].values.flatten().reshape(-1, 1)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(numbers)

    # 가장 빈번한 클러스터 선택
    cluster_counts = pd.Series(kmeans.labels_).value_counts()
    top_cluster = cluster_counts.idxmax()
    top_numbers = numbers[kmeans.labels_ == top_cluster]

    # 상위 6개의 번호 선택
    predicted_numbers = pd.Series(top_numbers.flatten()).value_counts().nlargest(6).index.tolist()
    return ensure_unique_numbers(predicted_numbers)

# 오토인코더를 사용하여 번호를 예측
def predict_autoencoder(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(numbers)

    # 오토인코더 정의
    input_dim = scaled_data.shape[1]
    autoencoder = Sequential([
        Input(shape=(input_dim,)),
        Dense(16, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

    # 복원된 데이터로 패턴 분석
    decoded_data = autoencoder.predict(scaled_data)
    predicted_numbers = [int(round(num)) for num in decoded_data[-1] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# Markov Chain을 사용하여 번호를 예측
def predict_markov_chain(data, weeks_ago):
    numbers = data.iloc[:, 2:].values.flatten()
    transitions = pd.crosstab(numbers[:-1], numbers[1:])
    probabilities = transitions.div(transitions.sum(axis=1), axis=0)

    last_number = numbers[-1]
    if last_number in probabilities.index:
        next_numbers = probabilities.loc[last_number].nlargest(6).index.tolist()
    else:
        next_numbers = random.sample(range(1, 46), 6)  # 기본값으로 랜덤 샘플 제공

    return ensure_unique_numbers(next_numbers)

# 메인 함수
def main():
    try:
        file_path = "Loto_number.xlsx"  # 엑셀 파일 경로
        data = pd.read_excel(file_path)
    except Exception as e:
        print(f"데이터 로드 실패: {e}")
        return

    # 예측 함수 매핑
    prediction_functions = {
        "total_high_frequency        ": predict_high_frequency_numbers,
        "total_low_frequency         ": predict_low_frequency_numbers,
        "total_combined_frequency    ": predict_combined_frequency_numbers,
        "total_next_vector           ": predict_next_vector,

        "recent_52_high_frequency    ": predict_high_frequency_numbers,
        "recent_52_low_frequency     ": predict_low_frequency_numbers,
        "recent_52_combined_frequency": predict_combined_frequency_numbers,
        "recent_52_next_vector       ": predict_next_vector,

        "recent_26_high_frequency    ": predict_high_frequency_numbers,
        "recent_26_low_frequency     ": predict_low_frequency_numbers,
        "recent_26_combined_frequency": predict_combined_frequency_numbers,
        "recent_26_next_vector       ": predict_next_vector,

        "neural_network              ": predict_neural_network,
        "random_forest               ": predict_random_forest,
        "gradient_boosting           ": predict_gradient_boosting,
        "LSTM                        ": predict_lstm,
        "Bayesian                    ": predict_bayesian,
        "KMeans                      ": predict_kmeans_clustering,
        "AutoEncoder                 ": predict_autoencoder,
        "MarkovChain                 ": predict_markov_chain
    }

    # 모델 성능 평가
    total_rewards = evaluate_predictions(data, prediction_functions)

    print("\n### 지난 일년(52주) 동안 각각의 방법으로 예측된 총당첨금 ###")
    for method, stats in total_rewards.items():
        total_reward = stats["total_reward"]
        win_count = stats["win_count"]
        print(f"{method}: 총 당첨 {total_reward}원({win_count}회)")
           
    print("\n### 다음 주 예상 번호 ###")
    all_predictions = []  # 모든 예측 결과를 저장할 리스트
    for method, func in prediction_functions.items():
        try:
            predicted = func(data, weeks_ago=1)
            predicted = ensure_unique_numbers(predicted)
            all_predictions.append(predicted)  # 앙상블에 포함할 결과 저장
            print(f"{method}: {predicted}")
        except Exception as e:
            print(f"{method}: 예측 실패 (오류: {e})")

    # 앙상블 예측 수행
    if all_predictions:
        ensemble_predictions = predict_ensemble_from_numpy([np.array(pred) for pred in all_predictions])
        print(f"\nEnsemble                    : {ensemble_predictions}")

if __name__ == "__main__":
    main()


In [None]:
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
import numpy as np
from skopt import gp_minimize
from skopt.space import Integer

# 번호의 중복 제거 및 부족 시 채워주는 함수
def ensure_unique_numbers(numbers):
    numbers = list(set(numbers))  # 중복 제거
    while len(numbers) < 6:
        numbers.append(random.randint(1, 45))  # 부족한 번호를 무작위로 추가
    return sorted(numbers[:6])

# 실제 번호와 예측 번호를 비교하여 보상을 계산하는 함수
def calculate_rewards(actual, predicted):
    match_count = len(set(actual) & set(predicted))  # 일치하는 번호 개수 계산
    rewards = {6: 10_000_000_000, 5: 1_499_000, 4: 49_000, 3: 4_000}
    return rewards.get(match_count, -1_000)  # 일치하지 않으면 -1,000 반환

# 앙상블 예측 함수
def predict_ensemble_from_numpy(predictions_list):
    """
    각 모델의 예측값을 numpy array로 받아 앙상블 예측 수행.
    """
    flat_predictions = np.concatenate(predictions_list).flatten()
    flat_predictions = flat_predictions[(flat_predictions >= 1) & (flat_predictions <= 45)]  # 유효 번호 필터링
    frequency_counts = pd.Series(flat_predictions).value_counts()

    if frequency_counts.empty:
        print("빈도 계산 결과가 없습니다. 빈 결과를 반환합니다.")
        return []

    return ensure_unique_numbers(frequency_counts.nlargest(6).index.tolist())

# 예측 모델의 성능을 평가하는 함수
def evaluate_predictions(data, prediction_functions, weeks_to_evaluate=52):
    total_rewards = {method: {"total_reward": 0, "win_count": 0} for method in prediction_functions.keys()}
    total_rewards["Ensemble"] = {"total_reward": 0, "win_count": 0}  # 앙상블 결과 추가
    prediction_storage = {method: [] for method in prediction_functions.keys()}

    for weeks_ago in range(weeks_to_evaluate, 0, -1):
        train_data = data[:-weeks_ago]
        test_data = data.iloc[-weeks_ago].values[2:]

        print(f"\n### ({weeks_ago}주전)예측되는 번호와 당첨 금액 ###")
        print(f"=> 실제 당첨 번호           : {sorted(test_data)}")

        for method, func in prediction_functions.items():
            predicted = func(train_data, weeks_ago)
            predicted = ensure_unique_numbers(predicted)

            if not predicted:
                print(f"{method}의 예측 결과가 비어 있습니다.")
                continue

            prediction_storage[method].append(predicted)  # 숫자 리스트 저장
            reward = calculate_rewards(test_data, predicted)
            total_rewards[method]["total_reward"] += reward
            if reward > 0:
                total_rewards[method]["win_count"] += 1

            print(f"{method}: {sorted(predicted)} (당첨금: {reward}원)")

        # 앙상블 예측 수행
        ensemble_predictions = predict_ensemble_from_numpy(
            [np.array(prediction_storage[method]) for method in prediction_storage]
        )
        ensemble_reward = calculate_rewards(test_data, ensemble_predictions)
        total_rewards["Ensemble"]["total_reward"] += ensemble_reward
        if ensemble_reward > 0:
            total_rewards["Ensemble"]["win_count"] += 1

        print(f"Ensemble                    : {ensemble_predictions} (당첨금: {ensemble_reward}원)")

    return total_rewards

# 높은 빈도의 숫자를 기반으로 예측
def predict_high_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nlargest(6).index.tolist()  # 상위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 낮은 빈도의 숫자를 기반으로 예측
def predict_low_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nsmallest(6).index.tolist()  # 하위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 높은 빈도 3개 + 낮은 빈도 3개 조합으로 예측
def predict_combined_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    most_frequent = frequency_counts.nlargest(3).index.tolist()  # 상위 3개 숫자
    least_frequent = frequency_counts.nsmallest(3).index.tolist()  # 하위 3개 숫자
    return ensure_unique_numbers(most_frequent + least_frequent)

# PCA를 사용하여 다음 벡터를 예측
def predict_next_vector(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numbers)  # 데이터 스케일링

    pca = PCA(n_components=1)
    principal_component = pca.fit_transform(scaled_data)  # PCA 변환

    next_vector = principal_component[-1] + (principal_component[-1] - principal_component[-2])  # 다음 벡터 예측
    inverse_scaled_vector = scaler.inverse_transform(scaled_data.mean(axis=0).reshape(1, -1)) + next_vector

    predicted_numbers = [int(round(num)) for num in inverse_scaled_vector[0] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# 신경망을 사용하여 번호를 예측
def predict_neural_network(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(6, activation='sigmoid')  # Using sigmoid to restrict range
    ])

    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])

    # Add random noise (jitter) to each prediction
    jittered_predictions = predictions + np.random.uniform(-0.05, 0.05, size=predictions.shape)
    jittered_predictions = np.clip(jittered_predictions, 0, 1)  # Ensure valid range for sigmoid outputs

    # Inverse transform to original scale
    inverse_transformed = scaler_y.inverse_transform(jittered_predictions)

    # Convert to integers and ensure range
    rounded_numbers = [int(round(num)) for num in inverse_transformed.flatten() if 1 <= round(num) <= 45]
    return ensure_unique_numbers(rounded_numbers[:6])

# 랜덤 포레스트를 사용하여 번호를 예측
def predict_random_forest(data, weeks_ago):
    from sklearn.ensemble import RandomForestRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# Gradient Boosting을 사용하여 번호를 예측
def predict_gradient_boosting(data, weeks_ago):
    from sklearn.ensemble import GradientBoostingRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    y_train = y_train.iloc[:, 0].values
    y_test = y_test.iloc[:, 0].values

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# LSTM을 사용하여 번호를 예측
def predict_lstm(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(numbers)

    X, y = [], []
    for i in range(len(scaled_data) - 10):
        X.append(scaled_data[i:i + 10])
        y.append(scaled_data[i + 10])

    X, y = np.array(X), np.array(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(128, return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(32, activation='relu'),
        Dense(6, activation='linear')
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])
    predicted_numbers = scaler.inverse_transform(predictions).flatten()
    return ensure_unique_numbers([int(round(num)) for num in predicted_numbers if 1 <= round(num) <= 45])

# Bayesian Optimization을 사용하여 번호를 예측
def predict_bayesian(data, weeks_ago):
    numbers = data.iloc[:, 2:].values

    def objective_function(x):
        selected_numbers = x
        count = sum(num in selected_numbers for num in numbers.flatten())
        return -count

    search_space = [Integer(1, 45) for _ in range(6)]
    result = gp_minimize(objective_function, search_space, n_calls=100)
    return ensure_unique_numbers(result.x)

# K-평균 클러스터링을 사용하여 번호를 예측
def predict_kmeans_clustering(data, weeks_ago, n_clusters=5):
    numbers = data.iloc[:, 2:].values.flatten().reshape(-1, 1)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(numbers)

    # 가장 빈번한 클러스터 선택
    cluster_counts = pd.Series(kmeans.labels_).value_counts()
    top_cluster = cluster_counts.idxmax()
    top_numbers = numbers[kmeans.labels_ == top_cluster]

    # 상위 6개의 번호 선택
    predicted_numbers = pd.Series(top_numbers.flatten()).value_counts().nlargest(6).index.tolist()
    return ensure_unique_numbers(predicted_numbers)

# 오토인코더를 사용하여 번호를 예측
def predict_autoencoder(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(numbers)

    # 오토인코더 정의
    input_dim = scaled_data.shape[1]
    autoencoder = Sequential([
        Input(shape=(input_dim,)),
        Dense(16, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

    # 복원된 데이터로 패턴 분석
    decoded_data = autoencoder.predict(scaled_data)
    predicted_numbers = [int(round(num)) for num in decoded_data[-1] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# Markov Chain을 사용하여 번호를 예측
def predict_markov_chain(data, weeks_ago):
    numbers = data.iloc[:, 2:].values.flatten()
    transitions = pd.crosstab(numbers[:-1], numbers[1:])
    probabilities = transitions.div(transitions.sum(axis=1), axis=0)

    last_number = numbers[-1]
    if last_number in probabilities.index:
        next_numbers = probabilities.loc[last_number].nlargest(6).index.tolist()
    else:
        next_numbers = random.sample(range(1, 46), 6)  # 기본값으로 랜덤 샘플 제공

    return ensure_unique_numbers(next_numbers)

# 메인 함수
def main():
    try:
        file_path = "Loto_number.xlsx"  # 엑셀 파일 경로
        data = pd.read_excel(file_path)
    except Exception as e:
        print(f"데이터 로드 실패: {e}")
        return

    # 예측 함수 매핑
    prediction_functions = {
        "total_high_frequency        ": predict_high_frequency_numbers,
        "total_low_frequency         ": predict_low_frequency_numbers,
        "total_combined_frequency    ": predict_combined_frequency_numbers,
        "total_next_vector           ": predict_next_vector,

        "recent_52_high_frequency    ": predict_high_frequency_numbers,
        "recent_52_low_frequency     ": predict_low_frequency_numbers,
        "recent_52_combined_frequency": predict_combined_frequency_numbers,
        "recent_52_next_vector       ": predict_next_vector,

        "recent_26_high_frequency    ": predict_high_frequency_numbers,
        "recent_26_low_frequency     ": predict_low_frequency_numbers,
        "recent_26_combined_frequency": predict_combined_frequency_numbers,
        "recent_26_next_vector       ": predict_next_vector,

        "neural_network              ": predict_neural_network,
        "random_forest               ": predict_random_forest,
        "gradient_boosting           ": predict_gradient_boosting,
        "LSTM                        ": predict_lstm,
        "Bayesian                    ": predict_bayesian,
        "KMeans                      ": predict_kmeans_clustering,
        "AutoEncoder                 ": predict_autoencoder,
        "MarkovChain                 ": predict_markov_chain
    }

    # 모델 성능 평가
    total_rewards = evaluate_predictions(data, prediction_functions)

    print("\n### 지난 일년(52주) 동안 각각의 방법으로 예측된 총당첨금 ###")
    for method, stats in total_rewards.items():
        total_reward = stats["total_reward"]
        win_count = stats["win_count"]
        print(f"{method}: 총 당첨 {total_reward}원({win_count}회)")
           
    print("\n### 다음 주 예상 번호 ###")
    all_predictions = []  # 모든 예측 결과를 저장할 리스트
    for method, func in prediction_functions.items():
        try:
            predicted = func(data, weeks_ago=1)
            predicted = ensure_unique_numbers(predicted)
            all_predictions.append(predicted)  # 앙상블에 포함할 결과 저장
            print(f"{method}: {predicted}")
        except Exception as e:
            print(f"{method}: 예측 실패 (오류: {e})")

    # 앙상블 예측 수행
    if all_predictions:
        ensemble_predictions = predict_ensemble_from_numpy([np.array(pred) for pred in all_predictions])
        print(f"\nEnsemble                    : {ensemble_predictions}")

if __name__ == "__main__":
    main()


In [None]:
import pandas as pd
import random
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, BatchNormalization, Input
import numpy as np
from skopt import gp_minimize
from skopt.space import Integer

# 번호의 중복 제거 및 부족 시 채워주는 함수
def ensure_unique_numbers(numbers):
    numbers = list(set(numbers))  # 중복 제거
    while len(numbers) < 6:
        numbers.append(random.randint(1, 45))  # 부족한 번호를 무작위로 추가
    return sorted(numbers[:6])

# 실제 번호와 예측 번호를 비교하여 보상을 계산하는 함수
def calculate_rewards(actual, predicted):
    match_count = len(set(actual) & set(predicted))  # 일치하는 번호 개수 계산
    rewards = {6: 10_000_000_000, 5: 1_499_000, 4: 49_000, 3: 4_000}
    return rewards.get(match_count, -1_000)  # 일치하지 않으면 -1,000 반환

# 앙상블 예측 함수
def predict_ensemble_from_numpy(predictions_list):
    """
    각 모델의 예측값을 numpy array로 받아 앙상블 예측 수행.
    """
    flat_predictions = np.concatenate(predictions_list).flatten()
    flat_predictions = flat_predictions[(flat_predictions >= 1) & (flat_predictions <= 45)]  # 유효 번호 필터링
    frequency_counts = pd.Series(flat_predictions).value_counts()

    if frequency_counts.empty:
        print("빈도 계산 결과가 없습니다. 빈 결과를 반환합니다.")
        return []

    return ensure_unique_numbers(frequency_counts.nlargest(6).index.tolist())

# 예측 모델의 성능을 평가하는 함수
def evaluate_predictions(data, prediction_functions, weeks_to_evaluate=52):
    total_rewards = {method: {"total_reward": 0, "win_count": 0} for method in prediction_functions.keys()}
    total_rewards["Ensemble"] = {"total_reward": 0, "win_count": 0}  # 앙상블 결과 추가
    prediction_storage = {method: [] for method in prediction_functions.keys()}

    for weeks_ago in range(weeks_to_evaluate, 0, -1):
        train_data = data[:-weeks_ago]
        test_data = data.iloc[-weeks_ago].values[2:]

        print(f"\n### ({weeks_ago}주전)예측되는 번호와 당첨 금액 ###")
        print(f"=> 실제 당첨 번호           : {sorted(test_data)}")

        for method, func in prediction_functions.items():
            predicted = func(train_data, weeks_ago)
            predicted = ensure_unique_numbers(predicted)

            if not predicted:
                print(f"{method}의 예측 결과가 비어 있습니다.")
                continue

            prediction_storage[method].append(predicted)  # 숫자 리스트 저장
            reward = calculate_rewards(test_data, predicted)
            total_rewards[method]["total_reward"] += reward
            if reward > 0:
                total_rewards[method]["win_count"] += 1

            print(f"{method}: {sorted(predicted)} (당첨금: {reward}원)")

        # 앙상블 예측 수행
        ensemble_predictions = predict_ensemble_from_numpy(
            [np.array(prediction_storage[method]) for method in prediction_storage]
        )
        ensemble_reward = calculate_rewards(test_data, ensemble_predictions)
        total_rewards["Ensemble"]["total_reward"] += ensemble_reward
        if ensemble_reward > 0:
            total_rewards["Ensemble"]["win_count"] += 1

        print(f"Ensemble                    : {ensemble_predictions} (당첨금: {ensemble_reward}원)")

    return total_rewards

# 높은 빈도의 숫자를 기반으로 예측
def predict_high_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nlargest(6).index.tolist()  # 상위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 낮은 빈도의 숫자를 기반으로 예측
def predict_low_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    predicted_numbers = frequency_counts.nsmallest(6).index.tolist()  # 하위 6개 숫자 선택
    return ensure_unique_numbers(predicted_numbers)

# 높은 빈도 3개 + 낮은 빈도 3개 조합으로 예측
def predict_combined_frequency_numbers(data, weeks_ago):
    frequency_counts = pd.Series(data.iloc[:, 2:].values.flatten()).value_counts()
    most_frequent = frequency_counts.nlargest(3).index.tolist()  # 상위 3개 숫자
    least_frequent = frequency_counts.nsmallest(3).index.tolist()  # 하위 3개 숫자
    return ensure_unique_numbers(most_frequent + least_frequent)

# PCA를 사용하여 다음 벡터를 예측
def predict_next_vector(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(numbers)  # 데이터 스케일링

    pca = PCA(n_components=1)
    principal_component = pca.fit_transform(scaled_data)  # PCA 변환

    next_vector = principal_component[-1] + (principal_component[-1] - principal_component[-2])  # 다음 벡터 예측
    inverse_scaled_vector = scaler.inverse_transform(scaled_data.mean(axis=0).reshape(1, -1)) + next_vector

    predicted_numbers = [int(round(num)) for num in inverse_scaled_vector[0] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# 신경망을 사용하여 번호를 예측
def predict_neural_network(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)
    scaler_X = MinMaxScaler()
    scaler_y = MinMaxScaler()

    X = scaler_X.fit_transform(X)
    y = scaler_y.fit_transform(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(64, activation='relu'),
        BatchNormalization(),
        Dropout(0.2),
        Dense(6, activation='sigmoid')  # Using sigmoid to restrict range
    ])

    model.compile(optimizer='adam', loss='mean_absolute_error', metrics=['mae'])
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), callbacks=[early_stopping], verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])

    # Add random noise (jitter) to each prediction
    jittered_predictions = predictions + np.random.uniform(-0.05, 0.05, size=predictions.shape)
    jittered_predictions = np.clip(jittered_predictions, 0, 1)  # Ensure valid range for sigmoid outputs

    # Inverse transform to original scale
    inverse_transformed = scaler_y.inverse_transform(jittered_predictions)

    # Convert to integers and ensure range
    rounded_numbers = [int(round(num)) for num in inverse_transformed.flatten() if 1 <= round(num) <= 45]
    return ensure_unique_numbers(rounded_numbers[:6])

# 랜덤 포레스트를 사용하여 번호를 예측
def predict_random_forest(data, weeks_ago):
    from sklearn.ensemble import RandomForestRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# Gradient Boosting을 사용하여 번호를 예측
def predict_gradient_boosting(data, weeks_ago):
    from sklearn.ensemble import GradientBoostingRegressor

    numbers = data.iloc[:, 2:].values
    X, y = [], []
    for i in range(len(numbers) - 10):
        X.append(numbers[i:i + 10].flatten())
        y.append(numbers[i + 10])

    X, y = pd.DataFrame(X), pd.DataFrame(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    y_train = y_train.iloc[:, 0].values
    y_test = y_test.iloc[:, 0].values

    model = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test[-weeks_ago:]).flatten()

    # 랜덤 노이즈 추가 및 다양성 확보
    diversified_predictions = predictions + np.random.uniform(-1.5, 1.5, size=predictions.shape)
    rounded_numbers = [int(round(num)) for num in diversified_predictions if 1 <= round(num) <= 45]

    # 중복 제거 및 범위 보장
    return ensure_unique_numbers(rounded_numbers[:6])

# LSTM을 사용하여 번호를 예측
def predict_lstm(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(numbers)

    X, y = [], []
    for i in range(len(scaled_data) - 10):
        X.append(scaled_data[i:i + 10])
        y.append(scaled_data[i + 10])

    X, y = np.array(X), np.array(y)

    split_idx = int(len(X) * 0.7)
    X_train, X_test = X[:split_idx], X[split_idx:]
    y_train, y_test = y[:split_idx], y[split_idx:]

    model = Sequential([
        Input(shape=(X_train.shape[1], X_train.shape[2])),
        LSTM(128, return_sequences=True),
        LSTM(64, return_sequences=False),
        Dense(32, activation='relu'),
        Dense(6, activation='linear')
    ])

    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)

    predictions = model.predict(X_test[-weeks_ago:])
    predicted_numbers = scaler.inverse_transform(predictions).flatten()
    return ensure_unique_numbers([int(round(num)) for num in predicted_numbers if 1 <= round(num) <= 45])

# Bayesian Optimization을 사용하여 번호를 예측
def predict_bayesian(data, weeks_ago):
    numbers = data.iloc[:, 2:].values

    def objective_function(x):
        selected_numbers = x
        count = sum(num in selected_numbers for num in numbers.flatten())
        return -count

    search_space = [Integer(1, 45) for _ in range(6)]
    result = gp_minimize(objective_function, search_space, n_calls=100)
    return ensure_unique_numbers(result.x)

# K-평균 클러스터링을 사용하여 번호를 예측
def predict_kmeans_clustering(data, weeks_ago, n_clusters=5):
    numbers = data.iloc[:, 2:].values.flatten().reshape(-1, 1)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    kmeans.fit(numbers)

    # 가장 빈번한 클러스터 선택
    cluster_counts = pd.Series(kmeans.labels_).value_counts()
    top_cluster = cluster_counts.idxmax()
    top_numbers = numbers[kmeans.labels_ == top_cluster]

    # 상위 6개의 번호 선택
    predicted_numbers = pd.Series(top_numbers.flatten()).value_counts().nlargest(6).index.tolist()
    return ensure_unique_numbers(predicted_numbers)

# 오토인코더를 사용하여 번호를 예측
def predict_autoencoder(data, weeks_ago):
    numbers = data.iloc[:, 2:].values
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(numbers)

    # 오토인코더 정의
    input_dim = scaled_data.shape[1]
    autoencoder = Sequential([
        Input(shape=(input_dim,)),
        Dense(16, activation='relu'),
        Dense(input_dim, activation='sigmoid')
    ])
    autoencoder.compile(optimizer='adam', loss='mse')
    autoencoder.fit(scaled_data, scaled_data, epochs=50, batch_size=16, verbose=0)

    # 복원된 데이터로 패턴 분석
    decoded_data = autoencoder.predict(scaled_data)
    predicted_numbers = [int(round(num)) for num in decoded_data[-1] if 1 <= round(num) <= 45]
    return ensure_unique_numbers(predicted_numbers)

# Markov Chain을 사용하여 번호를 예측
def predict_markov_chain(data, weeks_ago):
    numbers = data.iloc[:, 2:].values.flatten()
    transitions = pd.crosstab(numbers[:-1], numbers[1:])
    probabilities = transitions.div(transitions.sum(axis=1), axis=0)

    last_number = numbers[-1]
    if last_number in probabilities.index:
        next_numbers = probabilities.loc[last_number].nlargest(6).index.tolist()
    else:
        next_numbers = random.sample(range(1, 46), 6)  # 기본값으로 랜덤 샘플 제공

    return ensure_unique_numbers(next_numbers)

# 메인 함수
def main():
    try:
        file_path = "Loto_number.xlsx"  # 엑셀 파일 경로
        data = pd.read_excel(file_path)
    except Exception as e:
        print(f"데이터 로드 실패: {e}")
        return

    # 예측 함수 매핑
    prediction_functions = {
        "total_high_frequency        ": predict_high_frequency_numbers,
        "total_low_frequency         ": predict_low_frequency_numbers,
        "total_combined_frequency    ": predict_combined_frequency_numbers,
        "total_next_vector           ": predict_next_vector,

        "recent_52_high_frequency    ": predict_high_frequency_numbers,
        "recent_52_low_frequency     ": predict_low_frequency_numbers,
        "recent_52_combined_frequency": predict_combined_frequency_numbers,
        "recent_52_next_vector       ": predict_next_vector,

        "recent_26_high_frequency    ": predict_high_frequency_numbers,
        "recent_26_low_frequency     ": predict_low_frequency_numbers,
        "recent_26_combined_frequency": predict_combined_frequency_numbers,
        "recent_26_next_vector       ": predict_next_vector,

        "neural_network              ": predict_neural_network,
        "random_forest               ": predict_random_forest,
        "gradient_boosting           ": predict_gradient_boosting,
        "LSTM                        ": predict_lstm,
        "Bayesian                    ": predict_bayesian,
        "KMeans                      ": predict_kmeans_clustering,
        "AutoEncoder                 ": predict_autoencoder,
        "MarkovChain                 ": predict_markov_chain
    }

    # 모델 성능 평가
    total_rewards = evaluate_predictions(data, prediction_functions)

    print("\n### 지난 일년(52주) 동안 각각의 방법으로 예측된 총당첨금 ###")
    for method, stats in total_rewards.items():
        total_reward = stats["total_reward"]
        win_count = stats["win_count"]
        print(f"{method}: 총 당첨 {total_reward}원({win_count}회)")
           
    print("\n### 다음 주 예상 번호 ###")
    all_predictions = []  # 모든 예측 결과를 저장할 리스트
    for method, func in prediction_functions.items():
        try:
            predicted = func(data, weeks_ago=1)
            predicted = ensure_unique_numbers(predicted)
            all_predictions.append(predicted)  # 앙상블에 포함할 결과 저장
            print(f"{method}: {predicted}")
        except Exception as e:
            print(f"{method}: 예측 실패 (오류: {e})")

    # 앙상블 예측 수행
    if all_predictions:
        ensemble_predictions = predict_ensemble_from_numpy([np.array(pred) for pred in all_predictions])
        print(f"\nEnsemble                    : {ensemble_predictions}")

if __name__ == "__main__":
    main()
