# Prime Number Prediction using ML

In [1]:
import numpy as np
import pandas as pd
import time
import math
import random
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, classification_report
import tensorflow as tf
from tensorflow import keras
import gradio as gr


## Generate Dataset

In [2]:
def is_prime(n):
    if n < 2:
        return 0
    for i in range(2, int(math.sqrt(n)) + 1):
        if n % i == 0:
            return 0
    return 1

def generate_dataset(size=50000):
    numbers = np.arange(2, size + 2)
    labels = np.array([is_prime(n) for n in numbers])

    features = np.array([
        [n, sum(map(int, str(n))), bin(n).count('1'), n % 2, n % 3, n % 5, n % 7, n % 11, n % 13]
        for n in numbers
    ])

    return features, labels

## Prepare Data

In [3]:

X, y = generate_dataset(100000)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

## Train Models

In [4]:
models = {
    "Logistic Regression": LogisticRegression(),
    "Support Vector Machine": SVC(),
    "Random Forest": RandomForestClassifier(n_estimators=200),
    "Gradient Boosting": GradientBoostingClassifier(n_estimators=200),
    "Neural Network": MLPClassifier(hidden_layer_sizes=(256, 128, 64), max_iter=1000)
}

trained_models = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    trained_models[name] = model
    y_pred = model.predict(X_test)
    print(f"{name} Accuracy: {accuracy_score(y_test, y_pred)}")
    print(classification_report(y_test, y_pred))

Logistic Regression Accuracy: 0.90295
              precision    recall  f1-score   support

           0       0.92      0.98      0.95     18104
           1       0.46      0.16      0.23      1896

    accuracy                           0.90     20000
   macro avg       0.69      0.57      0.59     20000
weighted avg       0.87      0.90      0.88     20000

Support Vector Machine Accuracy: 0.90915
              precision    recall  f1-score   support

           0       0.92      0.98      0.95     18104
           1       0.56      0.20      0.29      1896

    accuracy                           0.91     20000
   macro avg       0.74      0.59      0.62     20000
weighted avg       0.89      0.91      0.89     20000

Random Forest Accuracy: 0.9044
              precision    recall  f1-score   support

           0       0.95      0.95      0.95     18104
           1       0.50      0.48      0.49      1896

    accuracy                           0.90     20000
   macro avg      

## Large Deep Learning Model

In [5]:
def build_deep_model():
    model = keras.Sequential([
        keras.layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)),
        keras.layers.Dense(256, activation='relu'),
        keras.layers.Dense(128, activation='relu'),
        keras.layers.Dense(64, activation='relu'),
        keras.layers.Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

large_nn = build_deep_model()
large_nn.fit(X_train, y_train, epochs=20, batch_size=1024, validation_data=(X_test, y_test))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step - accuracy: 0.8952 - loss: 0.2897 - val_accuracy: 0.9053 - val_loss: 0.1568
Epoch 2/20
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step - accuracy: 0.9054 - loss: 0.1557 - val_accuracy: 0.9080 - val_loss: 0.1474
Epoch 3/20
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 29ms/step - accuracy: 0.9109 - loss: 0.1463 - val_accuracy: 0.9085 - val_loss: 0.1415
Epoch 4/20
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - accuracy: 0.9098 - loss: 0.1402 - val_accuracy: 0.9070 - val_loss: 0.1365
Epoch 5/20
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 46ms/step - accuracy: 0.9087 - loss: 0.1365 - val_accuracy: 0.9107 - val_loss: 0.1366
Epoch 6/20
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 29ms/step - accuracy: 0.9086 - loss: 0.1357 - val_accuracy: 0.9093 - val_loss: 0.1373
Epoch 7/20
[1m79/79[0m [32m━━━━

<keras.src.callbacks.history.History at 0x79d6b687ddd0>

## Implement Miller-Rabin Primality Test

In [6]:
def miller_rabin(n, k=10):
    if n <= 1 or (n > 2 and n % 2 == 0):
        return False

    def check_composite(a, d, n, s):
        x = pow(a, d, n)
        if x == 1 or x == n - 1:
            return False
        for _ in range(s - 1):
            x = pow(x, 2, n)
            if x == n - 1:
                return False
        return True

    d, s = n - 1, 0
    while d % 2 == 0:
        d //= 2
        s += 1

    for _ in range(k):
        a = random.randint(2, n - 2)
        if check_composite(a, d, n, s):
            return False
    return True

## Gradio UI

In [7]:
def predict_number(n):
    n = int(n)
    features = np.array([[n, sum(map(int, str(n))), bin(n).count('1'), n % 2, n % 3, n % 5, n % 7, n % 11, n % 13]])
    features = scaler.transform(features)
    predictions = {name: model.predict(features)[0] for name, model in trained_models.items()}
    deep_prediction = "Prime" if large_nn.predict(features)[0][0] > 0.5 else "Composite"
    miller_rabin_result = "Prime" if miller_rabin(n) else "Composite"
    return {**predictions, "Deep Learning Model": deep_prediction, "Miller-Rabin Test": miller_rabin_result}

demo = gr.Interface(fn=predict_number, inputs="number", outputs="json")
demo.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://94483915538069c0e1.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




## Evaluate ML models

In [8]:
for name, model in trained_models.items():
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"{name} Accuracy: {acc:.5f}")

Logistic Regression Accuracy: 0.90295
Support Vector Machine Accuracy: 0.90915
Random Forest Accuracy: 0.90440
Gradient Boosting Accuracy: 0.91230
Neural Network Accuracy: 0.90500


## Evaluate Deep Learning Model

In [11]:
deep_learning_acc = large_nn.evaluate(X_test, y_test, verbose=0)[1]
print(f"Deep Learning Model Accuracy: {deep_learning_acc:.5f}")


Deep Learning Model Accuracy: 0.90985
