<a href="https://colab.research.google.com/github/Bread806/goldbach_backup_from_colab/blob/main/goldbach_0725_MLP_base2357_torch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount("/content/Drive")

Mounted at /content/Drive


In [None]:
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
from tensorflow.keras.layers import Embedding, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as keras
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Sequential
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler


def is_prime(num):
    """檢查一個數字是否為質數"""
    if num <= 1:
        return False
    for i in range(2, int(num ** 0.5) + 1):
        if num % i == 0:
            return False
    return True


def prime_table(x):
    """建立小於x的質數表"""
    primes = [num for num in range(2, x) if is_prime(num)]
    return primes


def convert_base_into_list(number, base, width=10):
    result = []  # init list

    for i in range(width):
        result.append([number % base])
        number = number // base
    result = result[::-1]
    return result


def prime_to_index(primeSize, primes, number):
    for index in range(primeSize):
        if number == primes[index]:
            return index
    return -1

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

# 數據轉換函數
def convert_base_into_list(number, base, width=10):
    result = []
    for i in range(width):
        result.append(number % base)
        number = number // base
    return result[::-1]  # 返回扁平的列表

print("---loading data---")
# 加載數據
trainData = pd.read_csv('/content/Drive/MyDrive/實驗/goldbach/csv/traindata_min_size_1000.csv')
trainDataNumbers = trainData['Number'].values
trainDataLabel = trainData['Factors'].values
print("---loading data done.---")

# 打亂訓練數據
shuffleIndices = np.random.permutation(len(trainDataNumbers))
shuffledNumbers = trainDataNumbers[shuffleIndices]
shuffledLabel = trainDataLabel[shuffleIndices]

# 準備特徵
mergedTrainNumber = []
for num in shuffledNumbers:
    feature = convert_base_into_list(num, 2) + convert_base_into_list(num, 3) + \
              convert_base_into_list(num, 5) + convert_base_into_list(num, 7)
    mergedTrainNumber.append(feature)

mergedArray = np.array(mergedTrainNumber)
mergedLabel = np.array(shuffledLabel)

# 分割數據
X_train, X_val, y_train, y_val = train_test_split(mergedArray, mergedLabel, test_size=0.2, random_state=42)

# 創建 PyTorch 數據集和數據加載器
class GoldbachDataset(Dataset):
    def __init__(self, features, labels):
        self.features = torch.FloatTensor(features)
        self.labels = torch.FloatTensor(labels).view(-1, 1)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.features[idx], self.labels[idx]

train_dataset = GoldbachDataset(X_train, y_train)
val_dataset = GoldbachDataset(X_val, y_val)

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128)

# 定義模型
class GoldbachModel(nn.Module):
    def __init__(self, input_size=40):
        super(GoldbachModel, self).__init__()
        self.layers = nn.Sequential(
            nn.Linear(input_size, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, 200),
            nn.ReLU(),
            nn.Linear(200, 1)
        )

    def forward(self, x):
        return self.layers(x)

# 檢查是否有可用的GPU
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"--- Using device: {device} ---")

# 創建模型實例
input_size = X_train.shape[1]
print(f"Input size: {input_size}")
model = GoldbachModel(input_size).to(device)

# 定義損失函數和優化器
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# 訓練模型
print("---starting training---")
num_epochs = 1000

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_features, batch_labels in train_loader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

        optimizer.zero_grad()
        outputs = model(batch_features)
        loss = criterion(outputs, batch_labels)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    # 驗證
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_features, batch_labels in val_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            outputs = model(batch_features)
            val_loss += criterion(outputs, batch_labels).item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {total_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

print("--------------------------------------------------------")

# 保存模型
torch.save(model.state_dict(), 'goldbach_model_0725.pth')
print("model saved.")

---loading data---
---loading data done.---
--- Using device: cpu ---
Input size: 40
---starting training---
Epoch [1/1000], Train Loss: 167613974.8571, Val Loss: 188751944.0000
Epoch [2/1000], Train Loss: 161889922.2857, Val Loss: 188155576.0000
Epoch [3/1000], Train Loss: 159069301.7143, Val Loss: 185008976.0000
Epoch [4/1000], Train Loss: 160607721.1429, Val Loss: 172922424.0000
Epoch [5/1000], Train Loss: 135719798.8571, Val Loss: 137804416.0000
Epoch [6/1000], Train Loss: 93300369.1429, Val Loss: 73394532.0000
Epoch [7/1000], Train Loss: 51032174.2857, Val Loss: 67498308.0000
Epoch [8/1000], Train Loss: 50581839.1429, Val Loss: 55794224.0000
Epoch [9/1000], Train Loss: 44515348.0000, Val Loss: 60065304.0000
Epoch [10/1000], Train Loss: 43720569.7143, Val Loss: 54496184.0000
Epoch [11/1000], Train Loss: 42660554.2857, Val Loss: 52551620.0000
Epoch [12/1000], Train Loss: 40398246.0000, Val Loss: 51309208.0000
Epoch [13/1000], Train Loss: 40666639.1429, Val Loss: 51183832.0000
Epoch 

## training

In [None]:
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)

Shape of X_train: (800, 40, 1)
Shape of y_train: (800,)


In [None]:
# 訓練模型
print("---starting training---")
num_epochs = 200

for epoch in range(num_epochs):
    model.train()
    for batch_features, batch_labels in train_loader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)

        optimizer.zero_grad()
        outputs = model(batch_features)
        loss = criterion(outputs.squeeze(), batch_labels)
        loss.backward()
        optimizer.step()

    # 驗證
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_features, batch_labels in val_loader:
            batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
            outputs = model(batch_features)
            val_loss += criterion(outputs.squeeze(), batch_labels).item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Train Loss: {loss.item():.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

print("--------------------------------------------------------")

# 保存模型
torch.save(model.state_dict(), 'goldbach_model_1020_400w.pth')
print("model saved.")

---starting training---


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (40) must match the size of tensor b (800) at non-singleton dimension 1

# predict

In [None]:
from google.colab import drive
drive.mount("/content/Drive")

Drive already mounted at /content/Drive; to attempt to forcibly remount, call drive.mount("/content/Drive", force_remount=True).


In [None]:
# loadding prime table
primeTable = pd.read_csv("/content/Drive/MyDrive/實驗/goldbach/csv/prime_table_5000000.csv")
primeTableNumber = primeTable["primes"].values
print ("---loading primes table---")

---loading primes table---


In [None]:
## load data
#predict

from tensorflow.keras.models import load_model
import csv
import pandas as pd
import numpy as np

# load torch model
model = GoldbachModel(input_size)
model.load_state_dict(torch.load('goldbach_model_0725.pth'))

# load test_set and prime data
testDataPath = '/content/Drive/MyDrive/實驗/goldbach/csv/test_set.csv'
testData = pd.read_csv(testDataPath)
testDataNumbers = testData['Number'].values
testDataLabel = testData['Factors'].values

print("---loading test data done.---")


---loading test data done.---


In [None]:
# shuffle training data
shuffleIndices = np.random.permutation(len(testDataNumbers))
shuffledNumbers = testDataNumbers[shuffleIndices]
shuffledLabel = testDataLabel[shuffleIndices]

In [None]:
# generate 25 prime table
primeTable25 = primeTableNumber[:25]
print (primeTable25)

[ 2  3  5  7 11 13 17 19 23 29 31 37 41 43 47 53 59 61 67 71 73 79 83 89
 97]


## one number
torch version

In [None]:
# 評估模型
model.eval()
total_loss = 0
correct = 0
total = 0

with torch.no_grad():
    for batch_features, batch_labels in val_loader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        outputs = model(batch_features)
        loss = criterion(outputs, batch_labels)
        total_loss += loss.item()

        # # 假設是二分類問題，計算準確度
        # predicted = (outputs > 0.5).float()
        # total += batch_labels.size(0)
        # correct += (predicted == batch_labels).sum().item()

average_loss = total_loss / len(val_loader)

print(f"Loss: {average_loss:.4f}")


Loss: 3080589.8750


In [None]:
import torch

# 加載模型
model = GoldbachModel(input_size)
model.load_state_dict(torch.load('goldbach_model_0725.pth'))
model.to(device)
model.eval()

# 準備數據
test_data = []  # 這裡放你想要預測的數字
new_features = []
for num in shuffledNumbers:
    feature = convert_base_into_list(num, 2) + convert_base_into_list(num, 3) + \
              convert_base_into_list(num, 5) + convert_base_into_list(num, 7)
    new_features.append(feature)



new_features = torch.FloatTensor(new_features).to(device)

# 進行預測
with torch.no_grad():
    predictions = model(new_features)

# # 輸出結果
print(predictions.cpu().numpy())


[[15031.179]
 [36580.44 ]
 [30671.1  ]
 [23571.498]
 [36222.133]
 [14329.631]
 [35477.844]
 [19946.96 ]
 [17878.895]
 [13885.85 ]
 [17843.88 ]
 [33272.08 ]
 [35271.83 ]
 [40637.79 ]
 [16778.498]
 [34703.715]
 [19543.363]
 [35963.47 ]
 [15709.218]
 [21333.703]
 [25594.297]
 [16892.29 ]
 [22211.81 ]
 [17365.27 ]
 [30677.592]
 [19537.941]
 [17475.613]
 [19651.922]
 [14856.12 ]
 [43659.945]
 [16815.371]
 [20018.656]
 [20027.459]
 [18301.785]
 [14482.29 ]
 [18105.598]
 [17937.033]
 [21270.635]
 [18454.39 ]
 [35671.73 ]
 [17678.61 ]
 [18232.43 ]
 [36004.31 ]
 [20611.49 ]
 [21280.547]
 [34899.39 ]
 [16225.054]
 [18809.85 ]
 [22050.83 ]
 [31308.867]]


In [None]:
roundPredictions = [int(i) for i in predictions]

In [None]:
print (len(testDataLabel))
print (len(roundPredictions))

50
50


In [None]:
def calculate_mse(predictions, targets):
    """
    計算均方誤差 (MSE)

    參數:
    predictions: 預測值的列表或陣列
    targets: 實際值的列表或陣列，與預測值對應

    返回值:
    mse: 均方誤差
    """
    # 確保預測值和實際值的長度相等
    if len(predictions) != len(targets):
        raise ValueError("預測值和實際值的長度不一致")

    # 計算平方誤差
    squared_errors = [(p - t) ** 2 for p, t in zip(predictions, targets)]

    # 計算均方誤差
    mse = sum(squared_errors) / len(predictions)

    return mse

# 使用 LN
mse_result = calculate_mse(roundPredictions, testDataLabel)

# 使用 MLP(old)
#mse_result = calculate_mse(predictions, testDataPartition)


import math
mean = testData['Factors'].mean()

#+N MSE
MSE = int(calculate_mse(roundPredictions, testDataLabel))

#MLP MSE
# MSE = int(calculate_mse(predictions, testDataPartition))

RMSE= math.sqrt(MSE)
errorRate = RMSE/mean

print (f"MSE : {MSE}\nRMSE : {RMSE}\nError Rate : {errorRate}  ->  {round(errorRate,7)*100}%")

MSE : 139189400
RMSE : 11797.85573737872
Error Rate : 0.5608006902609586  ->  56.080070000000006%
