In [24]:
def read_sequences(file_path):
    sequences = []

    try:
        with open(file_path, 'r') as file:
            sequence = ""
            for line in file:
                if line.startswith("-> "):
                    if sequence:
                        sequences.append(sequence.strip())  # Append the previous sequence
                    sequence = line[3:].strip()  # Start a new sequence
                else:
                    sequence += line.strip()  # Append the line to the current sequence

            # Append the last sequence (if any) when the file ends
            if sequence:
                sequences.append(sequence.strip())

    except FileNotFoundError:
        print(f"File not found: {file_path}")

    return sequences

In [25]:
import matplotlib.pyplot as plt
import numpy as np

# Define the colors for the nitrogen bases
base_colors = {'A': "red", 'G': "blue", 'T': "green", 'C': "purple"}

# Function to create a DNA spiral with Matplotlib
def create_dna_spiral(sequence, output_folder, sequence_number):
    # Create a figure and axis for the plot
    fig, ax = plt.subplots(figsize=(8, 8))

    # Set the initial radius and step for increasing the radius
    radius = 0.1
    radius_step = 0.1

    # Calculate the total length of the outline of each semi-circle
    total_length = np.pi * 2 * radius

    for base in sequence:
        color = base_colors.get(base, "black")

        # Divide each semi-circle into 4 segments
        segment_length = total_length / 4

        # Create points for each segment
        for i in range(4):
            theta = np.linspace(i * np.pi / 2, (i + 1) * np.pi / 2, 100)  # 100 points for each segment
            x = radius * np.cos(theta)
            y = radius * np.sin(theta)
            ax.plot(x, y, color=color, linewidth=1.5)

        radius += radius_step

    # Set equal aspect ratio
    ax.set_aspect('equal', adjustable='box')

    # Remove axis labels and ticks
    ax.set_xticks([])
    ax.set_yticks([])

    # Save the plot as an image with a name based on the sequence
    output_filename = f"{output_folder}/seq_{sequence_number}.png"
    plt.savefig(output_filename, bbox_inches='tight', dpi=300)
    plt.close()
    print(f"Saved spiral as {output_filename}")

In [26]:
# for classA sequences
file_path = "ClassA.txt"

# Create the 'spirals' directory if it doesn't exist
import os
if not os.path.exists("ClassA_spirals"):
    os.makedirs("ClassA_spirals")

# Read the DNA sequences from the file
sequence_list = read_sequences(file_path)

# Generate spirals and save photos
for i, sequence in enumerate(sequence_list, 1):
    create_dna_spiral(sequence, "ClassA_spirals", i)

Saved spiral as ClassA_spirals/seq_1.png
Saved spiral as ClassA_spirals/seq_2.png
Saved spiral as ClassA_spirals/seq_3.png
Saved spiral as ClassA_spirals/seq_4.png
Saved spiral as ClassA_spirals/seq_5.png
Saved spiral as ClassA_spirals/seq_6.png
Saved spiral as ClassA_spirals/seq_7.png
Saved spiral as ClassA_spirals/seq_8.png
Saved spiral as ClassA_spirals/seq_9.png
Saved spiral as ClassA_spirals/seq_10.png
Saved spiral as ClassA_spirals/seq_11.png
Saved spiral as ClassA_spirals/seq_12.png
Saved spiral as ClassA_spirals/seq_13.png
Saved spiral as ClassA_spirals/seq_14.png
Saved spiral as ClassA_spirals/seq_15.png
Saved spiral as ClassA_spirals/seq_16.png
Saved spiral as ClassA_spirals/seq_17.png
Saved spiral as ClassA_spirals/seq_18.png
Saved spiral as ClassA_spirals/seq_19.png
Saved spiral as ClassA_spirals/seq_20.png
Saved spiral as ClassA_spirals/seq_21.png
Saved spiral as ClassA_spirals/seq_22.png
Saved spiral as ClassA_spirals/seq_23.png
Saved spiral as ClassA_spirals/seq_24.png
S

In [4]:
# for classA sequences
file_path = "ClassB.txt"

# Create the 'spirals' directory if it doesn't exist
import os
if not os.path.exists("ClassB_spirals"):
    os.makedirs("ClassB_spirals")

# Read the DNA sequences from the file
sequence_list = read_sequences(file_path)

# Generate spirals and save photos
for i, sequence in enumerate(sequence_list, 1):
    create_dna_spiral(sequence, "ClassB_spirals", i)

Saved spiral as ClassB_spirals/seq_1.png
Saved spiral as ClassB_spirals/seq_2.png
Saved spiral as ClassB_spirals/seq_3.png
Saved spiral as ClassB_spirals/seq_4.png
Saved spiral as ClassB_spirals/seq_5.png
Saved spiral as ClassB_spirals/seq_6.png
Saved spiral as ClassB_spirals/seq_7.png
Saved spiral as ClassB_spirals/seq_8.png
Saved spiral as ClassB_spirals/seq_9.png
Saved spiral as ClassB_spirals/seq_10.png
Saved spiral as ClassB_spirals/seq_11.png
Saved spiral as ClassB_spirals/seq_12.png
Saved spiral as ClassB_spirals/seq_13.png
Saved spiral as ClassB_spirals/seq_14.png
Saved spiral as ClassB_spirals/seq_15.png
Saved spiral as ClassB_spirals/seq_16.png
Saved spiral as ClassB_spirals/seq_17.png
Saved spiral as ClassB_spirals/seq_18.png
Saved spiral as ClassB_spirals/seq_19.png
Saved spiral as ClassB_spirals/seq_20.png
Saved spiral as ClassB_spirals/seq_21.png
Saved spiral as ClassB_spirals/seq_22.png
Saved spiral as ClassB_spirals/seq_23.png
Saved spiral as ClassB_spirals/seq_24.png
S

In [5]:
import os

# Define the paths to the image directories
class_a_dir = "ClassA_spirals"
class_b_dir = "ClassB_spirals"

# Initialize lists to store image paths and labels
image_paths = []
labels = []

# Class A (label 0)
class_a_images = os.listdir(class_a_dir)
image_paths.extend([os.path.join(class_a_dir, img) for img in class_a_images])
labels.extend([0] * len(class_a_images))

# Class B (label 1)
class_b_images = os.listdir(class_b_dir)
image_paths.extend([os.path.join(class_b_dir, img) for img in class_b_images])
labels.extend([1] * len(class_b_images))


In [15]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from PIL import Image

# Step 2: Feature Extraction and Data Splitting

# Function to read and preprocess an image
def preprocess_image(image_path, target_size=(128, 128)):
    img = Image.open(image_path)  # Read the image
    img = img.resize(target_size, Image.BILINEAR)  # Resize the image
    img = np.array(img)  # Convert to numpy array
    img = img / 255.0  # Normalize pixel values to [0, 1]
    return img

# Load and preprocess the images
images = []
for image_path in image_paths:
    img = preprocess_image(image_path)
    images.append(img)

# Convert the list of images to a numpy array
X = np.array(images)

# Flatten the images into 1D arrays
X = X.reshape(X.shape[0], -1)

# Standardize the features (mean=0, variance=1)
scaler = StandardScaler()
X = scaler.fit_transform(X)


# Split the data into training (75%) and testing (25%) sets
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.25, random_state=42)

# Now, you have X_train, y_train for training and X_test, y_test for testing.


In [20]:
from sklearn.model_selection import GridSearchCV
from sklearn import svm
from sklearn.metrics import accuracy_score,classification_report
# Step 5: Hyperparameter Tuning

# Define a parameter grid to search
param_grid = {
    'C': [0.1, 1, 10, 100],  # Regularization parameter
    'kernel': ['linear', 'rbf'],  # Kernel type
}

# Create an SVM classifier
svm_classifier = svm.SVC(random_state=42)

# Create a GridSearchCV object
grid_search = GridSearchCV(estimator=svm_classifier, param_grid=param_grid, cv=5, n_jobs=-1)

# Perform the grid search on the training data
grid_search.fit(X_train, y_train)

# Get the best hyperparameters
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# Get the best model
best_svm_classifier = grid_search.best_estimator_

# Evaluate the best model
y_pred = best_svm_classifier.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy with Best Model: {accuracy * 100:.2f}%")

# Generate a classification report for the best model
report = classification_report(y_test, y_pred)
print("Classification Report for Best Model:\n", report)

Best Hyperparameters: {'C': 10, 'kernel': 'rbf'}
Accuracy with Best Model: 91.67%
Classification Report for Best Model:
               precision    recall  f1-score   support

           0       0.87      1.00      0.93        13
           1       1.00      0.82      0.90        11

    accuracy                           0.92        24
   macro avg       0.93      0.91      0.91        24
weighted avg       0.93      0.92      0.92        24



In [21]:
import joblib

# Define the filename for saving the model
model_filename = "best_svm_model.pkl"

# Save the best SVM model to the specified file
joblib.dump(svm_classifier, model_filename)

print(f"Best SVM model saved to {model_filename}")

Best SVM model saved to best_svm_model.pkl


In [22]:
# for Prediction sequences
file_path = "Prediction.txt"

# Create the 'spirals' directory if it doesn't exist
import os
if not os.path.exists("Prediction_spirals"):
    os.makedirs("Prediction_spirals")

# Read the DNA sequences from the file
sequence_list = read_sequences(file_path)

# Generate spirals and save photos
for i, sequence in enumerate(sequence_list, 1):
    create_dna_spiral(sequence, "Prediction_spirals", i)

Saved spiral as Prediction_spirals/seq_1.png


In [23]:
import numpy as np
from PIL import Image
import joblib

# Function to preprocess an image for prediction
def preprocess_prediction_image(image_path, target_size=(128, 128)):
    img = Image.open(image_path)  # Read the image
    img = img.resize(target_size, Image.BILINEAR)  # Resize the image
    img = np.array(img)  # Convert to numpy array
    img = img / 255.0  # Normalize pixel values to [0, 1]
    img = img.reshape(1, -1)  # Flatten the image into a 1D array
    return img

# Load the saved SVM model
model_filename = "best_svm_model.pkl"
best_svm_classifier = joblib.load(model_filename)

# Directory containing prediction images
prediction_image_dir = "Prediction_spirals"

# List to store predictions
predictions = []

# Iterate over the prediction images
for image_filename in os.listdir(prediction_image_dir):
    if image_filename.endswith(".png"):
        image_path = os.path.join(prediction_image_dir, image_filename)
        
        # Preprocess the image for prediction
        preprocessed_image = preprocess_prediction_image(image_path)
        preprocessed_image
        # Make a prediction using the SVM model
        prediction = best_svm_classifier.predict(preprocessed_image)
        
        # Append the prediction to the list
        predictions.append((image_filename, prediction[0]))

for image_filename, prediction in predictions:
    print(f"Image: {image_filename}, Predicted Class: {'Class A' if prediction == 0 else 'Class B'}")

NotFittedError: This SVC instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.