In [1]:
import cv2

In [10]:
def read_sequences(file_path):
    sequences = []

    try:
        with open(file_path, 'r') as file:
            sequence = ""
            for line in file:
                if line.startswith("-> "):
                    if sequence:
                        sequences.append(sequence.strip())  # Append the previous sequence
                    sequence = line[3:].strip()  # Start a new sequence
                else:
                    sequence += line.strip()  # Append the line to the current sequence

            # Append the last sequence (if any) when the file ends
            if sequence:
                sequences.append(sequence.strip())

    except FileNotFoundError:
        print(f"File not found: {file_path}")

    return sequences

In [11]:
import matplotlib.pyplot as plt
import numpy as np

# Define the colors for the nitrogen bases
base_colors = {'A': "red", 'G': "blue", 'T': "green", 'C': "purple"}

# Function to create a DNA spiral with Matplotlib
def create_dna_spiral(sequence, output_folder, sequence_number):
    # Create a figure and axis for the plot
    fig, ax = plt.subplots(figsize=(8, 8))

    # Set the initial radius and step for increasing the radius
    radius = 0.1
    radius_step = 0.1

    # Calculate the total length of the outline of each semi-circle
    total_length = np.pi * 2 * radius

    for base in sequence:
        color = base_colors.get(base, "black")

        # Divide each semi-circle into 4 segments
        segment_length = total_length / 4

        # Create points for each segment
        for i in range(4):
            theta = np.linspace(i * np.pi / 2, (i + 1) * np.pi / 2, 100)  # 100 points for each segment
            x = radius * np.cos(theta)
            y = radius * np.sin(theta)
            ax.plot(x, y, color=color, linewidth=1.5)

        radius += radius_step

    # Set equal aspect ratio
    ax.set_aspect('equal', adjustable='box')

    # Remove axis labels and ticks
    ax.set_xticks([])
    ax.set_yticks([])

    # Save the plot as an image with a name based on the sequence
    output_filename = f"{output_folder}/seq_{sequence_number}.png"
    plt.savefig(output_filename, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"Saved spiral as {output_filename}")

In [3]:
# for classA sequences
file_path = "ClassA.txt"

# Create the 'spirals' directory if it doesn't exist
import os
if not os.path.exists("ClassA_spirals"):
    os.makedirs("ClassA_spirals")

# Read the DNA sequences from the file
sequence_list = read_sequences(file_path)

# Generate spirals and save photos
for i, sequence in enumerate(sequence_list, 1):
    create_dna_spiral(sequence, "ClassA_spirals", i)

Saved spiral as ClassA_spirals/seq_1.png
Saved spiral as ClassA_spirals/seq_2.png
Saved spiral as ClassA_spirals/seq_3.png
Saved spiral as ClassA_spirals/seq_4.png
Saved spiral as ClassA_spirals/seq_5.png
Saved spiral as ClassA_spirals/seq_6.png
Saved spiral as ClassA_spirals/seq_7.png
Saved spiral as ClassA_spirals/seq_8.png
Saved spiral as ClassA_spirals/seq_9.png
Saved spiral as ClassA_spirals/seq_10.png
Saved spiral as ClassA_spirals/seq_11.png
Saved spiral as ClassA_spirals/seq_12.png
Saved spiral as ClassA_spirals/seq_13.png
Saved spiral as ClassA_spirals/seq_14.png
Saved spiral as ClassA_spirals/seq_15.png
Saved spiral as ClassA_spirals/seq_16.png
Saved spiral as ClassA_spirals/seq_17.png
Saved spiral as ClassA_spirals/seq_18.png
Saved spiral as ClassA_spirals/seq_19.png
Saved spiral as ClassA_spirals/seq_20.png
Saved spiral as ClassA_spirals/seq_21.png
Saved spiral as ClassA_spirals/seq_22.png
Saved spiral as ClassA_spirals/seq_23.png
Saved spiral as ClassA_spirals/seq_24.png
S

In [4]:
# for classB sequences
file_path = "ClassB.txt"

# Create the 'spirals' directory if it doesn't exist
import os
if not os.path.exists("ClassB_spirals"):
    os.makedirs("ClassB_spirals")

# Read the DNA sequences from the file
sequence_list = read_sequences(file_path)

# Generate spirals and save photos
for i, sequence in enumerate(sequence_list, 1):
    create_dna_spiral(sequence, "ClassB_spirals", i)

Saved spiral as ClassB_spirals/seq_1.png
Saved spiral as ClassB_spirals/seq_2.png
Saved spiral as ClassB_spirals/seq_3.png
Saved spiral as ClassB_spirals/seq_4.png
Saved spiral as ClassB_spirals/seq_5.png
Saved spiral as ClassB_spirals/seq_6.png
Saved spiral as ClassB_spirals/seq_7.png
Saved spiral as ClassB_spirals/seq_8.png
Saved spiral as ClassB_spirals/seq_9.png
Saved spiral as ClassB_spirals/seq_10.png
Saved spiral as ClassB_spirals/seq_11.png
Saved spiral as ClassB_spirals/seq_12.png
Saved spiral as ClassB_spirals/seq_13.png
Saved spiral as ClassB_spirals/seq_14.png
Saved spiral as ClassB_spirals/seq_15.png
Saved spiral as ClassB_spirals/seq_16.png
Saved spiral as ClassB_spirals/seq_17.png
Saved spiral as ClassB_spirals/seq_18.png
Saved spiral as ClassB_spirals/seq_19.png
Saved spiral as ClassB_spirals/seq_20.png
Saved spiral as ClassB_spirals/seq_21.png
Saved spiral as ClassB_spirals/seq_22.png
Saved spiral as ClassB_spirals/seq_23.png
Saved spiral as ClassB_spirals/seq_24.png
S

In [2]:
import os

# Define the paths to the image directories
class_a_dir = "ClassA_spirals"
class_b_dir = "ClassB_spirals"

# Initialize lists to store image paths and labels
image_paths = []
labels = []

# Class A (label 0)
class_a_images = os.listdir(class_a_dir)
image_paths.extend([os.path.join(class_a_dir, img) for img in class_a_images])
labels.extend([0] * len(class_a_images))

# Class B (label 1)
class_b_images = os.listdir(class_b_dir)
image_paths.extend([os.path.join(class_b_dir, img) for img in class_b_images])
labels.extend([1] * len(class_b_images))


In [4]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from PIL import Image

# Step 2: Feature Extraction and Data Splitting

# Function to read and preprocess an image
def preprocess_image(image_path, target_size=(128, 128)):
    img = cv2.imread(image_path)  # Read the image using OpenCV
    img = cv2.resize(img, target_size)  # Resize the image
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    return img

def extract_hog_features(image):
    win_size = (64, 64)  # Define the window size
    block_size = (16, 16)  # Define the block size
    block_stride = (8, 8)  # Define the block stride
    cell_size = (8, 8)  # Define the cell size
    nbins = 9  # Number of bins for the histogram
    hog = cv2.HOGDescriptor(win_size, block_size, block_stride, cell_size, nbins)
    features = hog.compute(image)
    return features.flatten()


# Load and preprocess the images
images = []
features = []  # Add this list to store extracted features

for image_path in image_paths:
    img = preprocess_image(image_path)
    hog_features = extract_hog_features(img)
    images.append(img)
    features.append(hog_features)

X = np.array(features)  # Use the extracted features as your data


# Split the data into training (75%) and testing (25%) sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Now, you have X_train, y_train for training and X_test, y_test for testing.


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [15]:
from sklearn import svm
from sklearn.metrics import accuracy_score, classification_report

# Step: Model Training

# Define the SVM classifier with your chosen hyperparameters
# Example hyperparameters: C=1.0 (regularization parameter), kernel='linear' (kernel type)
svm_classifier = svm.SVC(C=1.0, kernel='linear', random_state=42)

# Train the SVM classifier on the training data
svm_classifier.fit(X_train, y_train)

# Make predictions on the test data
y_pred = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with SVM Model: {accuracy * 100:.2f}%")

# Generate a classification report for the SVM model
report = classification_report(y_test, y_pred)
print("Classification Report for SVM Model:\n", report)

Accuracy with SVM Model: 83.33%
Classification Report for SVM Model:
               precision    recall  f1-score   support

           0       0.91      0.77      0.83        13
           1       0.77      0.91      0.83        11

    accuracy                           0.83        24
   macro avg       0.84      0.84      0.83        24
weighted avg       0.84      0.83      0.83        24



In [16]:
import joblib

# Define the filename for saving the model
model_filename = "best_svm_model.pkl"

# Save the best SVM model to the specified file
joblib.dump(svm_classifier, model_filename)

print(f"Best SVM model saved to {model_filename}")

Best SVM model saved to best_svm_model.pkl


In [24]:
# for Prediction sequences
file_path = "Prediction.txt"

# Create the 'spirals' directory if it doesn't exist
import os
if not os.path.exists("Prediction_spirals"):
    os.makedirs("Prediction_spirals")

# Read the DNA sequences from the file
sequence_list = read_sequences(file_path)

# Generate spirals and save photos
for i, sequence in enumerate(sequence_list, 1):
    create_dna_spiral(sequence, "Prediction_spirals", i)

Saved spiral as Prediction_spirals/seq_1.png


In [23]:
import os
import numpy as np
import joblib
import cv2  # Import OpenCV

# Function to preprocess an image for prediction using OpenCV
def preprocess_prediction_image(image_path, target_size=(128, 128)):
    img = cv2.imread(image_path)  # Read the image using OpenCV
    img = cv2.resize(img, target_size)  # Resize the image
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # Convert to grayscale

    # Extract HOG features (matching the number of features used during training)
    hog_features = extract_hog_features(img)

    return hog_features  # Return the HOG features directly

# Load the saved SVM model
model_filename = "best_svm_model.pkl"
best_svm_classifier = joblib.load(model_filename)

# Directory containing prediction images
prediction_image_dir = "Prediction_spirals"

# List to store predictions
predictions = []

# Iterate over the prediction images
for image_filename in os.listdir(prediction_image_dir):
    if image_filename.endswith(".png"):
        image_path = os.path.join(prediction_image_dir, image_filename)
        
        # Preprocess the image for prediction using OpenCV
        preprocessed_image = preprocess_prediction_image(image_path)

        # Reshape the input to a 2D array
        preprocessed_image = preprocessed_image.reshape(1, -1)

        # Make a prediction using the SVM model
        prediction = best_svm_classifier.predict(preprocessed_image)
        
        # Append the prediction to the list
        predictions.append((image_filename, prediction[0]))

for image_filename, prediction in predictions:
    print(f"Image: {image_filename}, Predicted Class: {'Class A' if prediction == 0 else 'Class B'}")

Image: seq_1.png, Predicted Class: Class A
