In [2]:
pip install qrcode

Collecting qrcodeNote: you may need to restart the kernel to use updated packages.

  Downloading qrcode-7.4.2-py3-none-any.whl.metadata (17 kB)
Collecting pypng (from qrcode)
  Downloading pypng-0.20220715.0-py3-none-any.whl.metadata (13 kB)
Downloading qrcode-7.4.2-py3-none-any.whl (46 kB)
   ---------------------------------------- 0.0/46.2 kB ? eta -:--:--
   ---------------------------------------- 0.0/46.2 kB ? eta -:--:--
   -------- ------------------------------- 10.2/46.2 kB ? eta -:--:--
   -------- ------------------------------- 10.2/46.2 kB ? eta -:--:--
   -------- ------------------------------- 10.2/46.2 kB ? eta -:--:--
   -------- ------------------------------- 10.2/46.2 kB ? eta -:--:--
   ----------------------------------- ---- 41.0/46.2 kB 178.6 kB/s eta 0:00:01
   ----------------------------------- ---- 41.0/46.2 kB 178.6 kB/s eta 0:00:01
   ---------------------------------------- 46.2/46.2 kB 128.0 kB/s eta 0:00:00
Downloading pypng-0.20220715.0-py3-none-any

In [7]:
import pandas as pd
import qrcode
import os
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [8]:
# Function to generate QR code
def generate_qr_code(url, output_path):
    qr = qrcode.QRCode(version=1, error_correction=qrcode.constants.ERROR_CORRECT_L, box_size=10, border=4)
    qr.add_data(url)
    qr.make(fit=True)
    img = qr.make_image(fill_color="black", back_color="white")
    img.save(output_path)
     

In [3]:
# Function to create directories if they don't exist
def create_directories(good_dir, bad_dir):
    if not os.path.exists(good_dir):
        os.makedirs(good_dir)
    if not os.path.exists(bad_dir):
        os.makedirs(bad_dir)
     

In [9]:
# Read CSV file
csv_file = './phishing_site_urls.csv' # Change this to your CSV file name
df = pd.read_csv(csv_file)
# Output directories
qr_codes_data = []
good_dir = './QR_codes/good_qr_codes'
bad_dir = './QR_codes/bad_qr_codes'
create_directories(good_dir, bad_dir)

In [14]:
qr_code_counter = 0
for index, row in df.iterrows():
    url = row['URL']
    label = row['Label']  # Assuming column name is 'Label'
    output_path = os.path.join(good_dir if label == 'good' else bad_dir, f'{index}.png')
    generate_qr_code(url, output_path)
    qr_codes_data.append((url, label))
    qr_code_counter += 1
    if qr_code_counter >= 30000:
        print("Reached the limit of 30000 QR codes. Exiting the loop.")
        break

Reached the limit of 30000 QR codes. Exiting the loop.


In [15]:
# Splitting data into features and labels
urls, labels = zip(*qr_codes_data)

In [16]:
# Encoding labels
label_encoder = LabelEncoder()
labels = label_encoder.fit_transform(labels)

In [17]:
# Tokenizing URLs
tokenizer = Tokenizer()
tokenizer.fit_on_texts(urls)
sequences = tokenizer.texts_to_sequences(urls)
max_sequence_length = max([len(seq) for seq in sequences])
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post')

In [18]:
X_train, X_test, y_train, y_test = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

In [20]:
# Building the neural network
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index)+1, output_dim=50),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])


In [21]:
# Training the model
model.fit(X_train, y_train, epochs=10, batch_size=64, validation_data=(X_test, y_test))

Epoch 1/10
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 32ms/step - accuracy: 0.9962 - loss: 0.0580 - val_accuracy: 0.9965 - val_loss: 0.0236
Epoch 2/10
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 28ms/step - accuracy: 0.9974 - loss: 0.0177 - val_accuracy: 0.9965 - val_loss: 0.0234
Epoch 3/10
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 28ms/step - accuracy: 0.9972 - loss: 0.0188 - val_accuracy: 0.9965 - val_loss: 0.0233
Epoch 4/10
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 27ms/step - accuracy: 0.9966 - loss: 0.0217 - val_accuracy: 0.9965 - val_loss: 0.0238
Epoch 5/10
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 28ms/step - accuracy: 0.9967 - loss: 0.0212 - val_accuracy: 0.9965 - val_loss: 0.0228
Epoch 6/10
[1m379/379[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 28ms/step - accuracy: 0.9974 - loss: 0.0168 - val_accuracy: 0.9965 - val_loss: 0.0221
Epoch 7/10
[1m3

<keras.src.callbacks.history.History at 0x226c9c3b990>

In [22]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Loss: {loss}, Accuracy: {accuracy}')

[1m190/190[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9961 - loss: 0.0194
Loss: 0.01757930964231491, Accuracy: 0.9965289235115051


In [23]:
pip install pytesseract

Collecting pytesseract
  Downloading pytesseract-0.3.10-py3-none-any.whl.metadata (11 kB)
Downloading pytesseract-0.3.10-py3-none-any.whl (14 kB)
Installing collected packages: pytesseract
Successfully installed pytesseract-0.3.10
Note: you may need to restart the kernel to use updated packages.


In [24]:
import cv2
import numpy as np
import pytesseract

In [25]:
# Function to preprocess QR code image and predict label
def predict_qr_code(image_path, tokenizer, model):
    # Read QR code image
    qr_image = cv2.imread(image_path)

    # Perform OCR to extract text from the QR code image
    text = pytesseract.image_to_string(qr_image)

    # Tokenize and pad the URL sequence
    sequence = tokenizer.texts_to_sequences([text])
    padded_sequence = pad_sequences(sequence, maxlen=max_sequence_length, padding='post')

    # Predict the label (0: bad, 1: good)
    prediction = model.predict(padded_sequence)
    predicted_label = "good" if prediction[0] > 0.5 else "bad"

    return predicted_label

In [30]:
# Specify the path to the Tesseract executable
pytesseract.pytesseract.tesseract_cmd = r'.\Tesseract-OCR\tesseract.exe'  # Example path, adjust it based on your installation

# Now you can use the predict_qr_code function
image_path = "./download1.png"  # Replace this with the path to your QR code image
predicted_label = predict_qr_code(image_path, tokenizer, model)
print("Predicted label:", predicted_label)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
Predicted label: bad
