In [1]:
from RiboMetric.metrics import (
    fourier_transform,
    read_frame_information_content,
    periodicity_dominance,
    periodicity_autocorrelation,
    )

from RiboMetric.modules import (
    read_frame_score_trips_viz
)

import plotly.graph_objects as go
import pandas as pd


In [2]:
import itertools

def generate_read_frame_distribution_permutations():
    """
    Generate all possible permutations of read frame distributions that sum to 100.
    
    Args:
        None

    Returns:
        simulated_read_frame_proportions (dict): Dictionary containing all possible read frame distributions that sum to 100.
    """
    # Generate permutations
    numbers = range(1, 101)
    permutations = []
    for perm in itertools.permutations(numbers, 3):
        if sum(perm) == 100:
            permutations.append(perm)

    # Simulate read frame proportions
    simulated_read_frame_proportions = {}
    for i, perm in enumerate(permutations):
        simulated_read_frame_proportions[i] = {0: perm[0], 1: perm[1], 2: perm[2]}

    return simulated_read_frame_proportions


In [3]:
import random

def generate_metagene(frame_ratios, start, stop, noise_factor=0.8, max_count=100):
    """
    Generate a metagene dictionary with varying degrees of periodicity based on frame ratios.
    
    Args:
        frame_ratios (dict): A dictionary representing the global ratio of reads per frame.
                             Keys are frame positions (0, 1, 2), and values are the corresponding ratios.
        start (int): The start position of the metagene.
        stop (int): The stop position of the metagene.
    
    Returns:
        dict: A dictionary representing the metagene, where keys are positions and values are counts.
    """
    metagene = {}
    
    # Initialize all positions with a small constant value
    for pos in range(start, stop):
        metagene[pos] = 1
    
    # Adjust counts based on frame ratios
    for pos in range(start, stop):
        frame = (pos - start) % 3
        metagene[pos] += int(frame_ratios[frame] * max_count)
    
    # Introduce some random noise
    for pos in range(start, stop):
        metagene[pos] += int(random.uniform(0, noise_factor * max_count))
    
    return metagene


In [10]:
example_metagenes = {
    "best": generate_metagene({0: 0.98, 1: 0.01, 2: 0.01}, 0, 30, noise_factor=0.1),
    "worst": generate_metagene({0: 0.45, 1: 0.45, 2: 0.1}, 0, 30, noise_factor=0.1),
    "random": generate_metagene({0: 0.33, 1: 0.33, 2: 0.34}, 0, 30, noise_factor=0.8)
}



In [32]:
import math

def calculate_3nt_periodicity_score(probabilities):
    '''
    Calculate the triplet periodicity score for a given probability of a read
    being in frame. The score is the square root of the bits of information in
    the triplet distribution.

    Numerator is the Maximum Entropy of the triplet distribution minus the
    entropy of the triplet distribution.
    Denominator is the Maximum Entropy of the triplet distribution.

    Inputs:
        probability (float): The probability of a read being in frame.

    Returns:
        result (float): The triplet periodicity score.
    '''
    maximum_entropy = math.log2(3)
    entropy = 0
    for probability in probabilities:
        entropy += -(probability * math.log2(probability))
    result = math.sqrt((maximum_entropy - entropy) / maximum_entropy)
    return result

def read_frame_information_content(
    read_frame_distribution: dict,
        ) -> dict:
    """
    Calculate the read frame distribution metric from the output of
    the read_frame_distribution module.

    This metric is the Shannon entropy of the read frame distribution

    Inputs:
        read_frame_distribution: Dictionary containing the output of the
                read_frame_distribution module

    Outputs:
        frame_info_content_dict: Shannon entropy of the read frame
                distribution where keys are read length and values are tuples
                containing information content in bits and number of reads in
                frame
    """
    pseudocount = 1e-100
    frame_info_content_dict = {}
    for read_length in read_frame_distribution:
        total_count = sum(read_frame_distribution[read_length].values())

        probabilities = []
        for frame, count in read_frame_distribution[read_length].items():
            prob = (count + pseudocount) / (total_count + pseudocount)
            probabilities.append(prob)
        score = calculate_3nt_periodicity_score(probabilities)

        frame_info_content_dict[read_length] = score, total_count

    return frame_info_content_dict

In [4]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [57]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
y = 1.1
# Create a figure with three subplots
fig = make_subplots(rows=1, cols=3, subplot_titles=('Best', 'Middle', 'Worst'), vertical_spacing=0.2)

# Plot each metagene in a separate subplot
for i, (metagene_name, metagene) in enumerate(example_metagenes.items(), start=1):
    read_frame_dict = {0: 0, 1: 0, 2: 0}
    for pos, count in metagene.items():
        frame = pos % 3
        read_frame_dict[frame] += count

    positions = list(metagene.keys())
    counts = list(metagene.values())
    trips_score = round(read_frame_score_trips_viz({1: read_frame_dict})[1], 2)
    fourier_score = round(fourier_transform({"start": {1: metagene}}, read_lengths=[1])[1], 2)
    info_score = round(read_frame_information_content({1: read_frame_dict})[1][0], 2)
    dominance_score = round(periodicity_dominance({1: read_frame_dict})[1], 2)
    autocorelation_score = round(periodicity_autocorrelation({"start": {1: metagene}}, 3)[1], 2)

    # rank scores 

    trace = go.Bar(x=positions, y=counts)
    fig.add_trace(trace, row=1, col=i)
    y += 1.1
    # Add annotations for the scores
    fig.add_annotation(
        x=0, y=y, text=f"""
        Trips Score: {trips_score}
        Fourier Score: {fourier_score}
        Info Score: {info_score}
        Dominance Score: {dominance_score}
        Autocorrelation Score: {autocorelation_score}
        """, xref="paper", yref="paper", xanchor="left", yanchor="top", row=1, col=i
    )


# Set the axis labels
fig.update_xaxes(title_text='Position')
fig.update_yaxes(title_text='Count')

# Set the overall title
fig.update_layout(title='Periodic Metagenes', height=600)

# Show the plot
fig.show()