<a href="https://colab.research.google.com/github/Kamiilykam/pandas-challenge/blob/master/Coding%20Script%20for%20ASO%20Optimization%20Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install Packages


In [None]:
pip install biopython

In [None]:
import pandas as pd

In [None]:
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp

In [None]:
!pip install stats

In [None]:
!pip install scipy

In [None]:
from Bio.Seq import Seq
from Bio.SeqUtils import MeltingTemp
import csv
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt

def read_mrna_sequence(file_path):
    with open(file_path, 'r') as file:
        return Seq(file.read().strip())

def generate_antisense_oligos(mrna_sequence, oligo_length=25):
    antisense_oligos = [mrna_sequence.reverse_complement()[i:i+oligo_length] for i in range(len(mrna_sequence) - (oligo_length - 1))]
    return antisense_oligos

def calculate_minimum_free_energy(oligo, temperature):
    delta_g = -MeltingTemp.Tm_Wallace(str(oligo)) * (1.987 / 1000) * (temperature + 273.15) * 1.0 / 1000
    return delta_g

def calculate_gc_content(oligo):
    return Seq(str(oligo)).count("G") + Seq(str(oligo)).count("C")

def calculate_alignment_percentage(oligo, mrna_sequence):
    alignment_length = sum(1 for a, b in zip(oligo, mrna_sequence) if a == b)
    return (alignment_length / len(oligo)) * 100

def calculate_combined_score(gc_content, alignment_percentage):
    # Adjust weights based on your preference
    weight_gc = 0.7
    weight_alignment = 0.3
    combined_score = (weight_gc * gc_content) + (weight_alignment * alignment_percentage)
    return combined_score

def write_to_csv(data, filename):
    with open(filename, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['Oligo', 'Minimum Free Energy (kcal/mol)', 'GC Content (%)', 'Alignment Percentage (%)', 'Combined Score'])
        writer.writerows(data)

def main():
    mrna_sequence_file = '/content/NC_000011.10[62880098..62889656].fa.txt'
    temperature = 37

    mrna_sequence = read_mrna_sequence(mrna_sequence_file)
    antisense_oligos = generate_antisense_oligos(mrna_sequence, oligo_length=25)

    alignments_data = []

    for oligo in antisense_oligos:
        delta_g = calculate_minimum_free_energy(oligo, temperature)
        gc_content = calculate_gc_content(oligo)
        alignment_percentage = calculate_alignment_percentage(oligo, mrna_sequence)
        combined_score = calculate_combined_score(gc_content, alignment_percentage)
        alignments_data.append((str(oligo), delta_g, gc_content, alignment_percentage, combined_score))

    # Sort alignments by the combined score in descending order
    alignments_data.sort(key=lambda x: x[4], reverse=True)

    # Write data to CSV file
    csv_filename = 'oligo_data_combined.csv'
    write_to_csv(alignments_data, csv_filename)
    print(f'Data written to {csv_filename}')

    # Plot the graph
    gc_contents = [gc_content for _, _, gc_content, _, _ in alignments_data]
    combined_scores = [combined_score for _, _, _, _, combined_score in alignments_data]

    plt.figure(figsize=(10, 6))
    plt.scatter(gc_contents, combined_scores, color='blue', marker='o')
    plt.title('GC Content vs Combined Score')
    plt.xlabel('GC Content (%)')
    plt.ylabel('Combined Score')

    # Annotate the points with the corresponding oligo index
    for i, (oligo, _, _, _, _) in enumerate(alignments_data[:10]):
        plt.annotate(f'{i + 1}.', (gc_contents[i], combined_scores[i]))

    plt.show()

if __name__ == "__main__":
    main()


In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats

def main():
    # Read the data
    df = pd.read_csv('oligo_data_combined.csv')

    # Perform a t-test as an example
    group1 = df.head(1000)['Minimum Free Energy (kcal/mol)']
    group2 = df.tail(1000)['Minimum Free Energy (kcal/mol)']

    t_stat, p_value = stats.ttest_ind(group1, group2)

    # Print the t-statistic and p-value
    print(f'\nT-statistic: {t_stat:.4f}')
    print(f'P-value: {p_value:.4f}')

if __name__ == "__main__":
    main()


T-statistic: -62.7681
P-value: 0.0000
