# Uniformity

This notebook aims to examine different metrics for the assessment of the uniformity of Ribo-Seq data based on their metagene profile 

## Setup


In [1]:
!pip uninstall RiboMetric -y
!pip install git+https://github.com/JackCurragh/RiboMetric.git -q 
!pip install plotly pandas numpy scikit-learn -q

Found existing installation: RiboMetric 0.1.9
Uninstalling RiboMetric-0.1.9:
  Successfully uninstalled RiboMetric-0.1.9


In [7]:
from RiboMetric.metrics import (
    uniformity_autocorrelation,
    uniformity_entropy,
    uniformity_gini_index,
    uniformity_theil_index,
    )

import itertools
import numpy as np


import plotly.graph_objects as go
import pandas as pd


## Simulated Inputs


In [4]:
import itertools

def generate_read_frame_distribution_permutations():
    """
    Generate all possible permutations of read frame distributions that sum to 100.
    
    Args:
        None

    Returns:
        simulated_read_frame_proportions (dict): Dictionary containing all possible read frame distributions that sum to 100.
    """
    # Generate permutations
    numbers = range(1, 101)
    permutations = []
    for perm in itertools.permutations(numbers, 3):
        if sum(perm) == 100:
            permutations.append(perm)

    # Simulate read frame proportions
    simulated_read_frame_proportions = {}
    for i, perm in enumerate(permutations):
        simulated_read_frame_proportions[i] = {0: perm[0], 1: perm[1], 2: perm[2]}

    return simulated_read_frame_proportions

import random

def generate_metagene(frame_ratios, start, stop, noise_factor=0.8, max_count=100):
    """
    Generate a metagene dictionary with varying degrees of periodicity based on frame ratios.
    
    Args:
        frame_ratios (dict): A dictionary representing the global ratio of reads per frame.
                             Keys are frame positions (0, 1, 2), and values are the corresponding ratios.
        start (int): The start position of the metagene.
        stop (int): The stop position of the metagene.
    
    Returns:
        dict: A dictionary representing the metagene, where keys are positions and values are counts.
    """
    metagene = {}
    
    # Initialize all positions with a small constant value
    for pos in range(start, stop):
        metagene[pos] = 1
    
    # Adjust counts based on frame ratios
    for pos in range(start, stop):
        frame = (pos - start) % 3
        metagene[pos] += int(frame_ratios[frame] * max_count)
    
    # Introduce some random noise
    for pos in range(start, stop):
        metagene[pos] += int(random.uniform(0, noise_factor * max_count))
    
    return metagene



In [5]:


def generate_permuted_profile_with_bins(base_profile, num_bins=5):
    """
    Generate permuted profiles with varying degrees of variation across the entire profile, using bins and weights.
    
    Args:
        base_profile (list): The base read count profile.
        num_bins (int): Number of bins to divide the profile into.
    
    Returns:
        list: List of permuted profiles with varying degrees of variation.
    """
    bin_indices = np.linspace(0, len(base_profile), num_bins + 1, dtype=int)

    permuted_profiles = []
    # Iterate over all combinations of weights
    for bin_weights in itertools.product(range(num_bins + 1), repeat=num_bins):
        permuted_profile = []
        for i, weight in enumerate(bin_weights):
            bin_values = np.random.normal(loc=base_profile[bin_indices[i]:bin_indices[i + 1]], scale=weight)
            permuted_profile.extend(bin_values)
        permuted_profiles.append(permuted_profile)
    return permuted_profiles

In [9]:
start = 30
stop = 150

simulated_metagenes = {'start':{}, 'stop':{}}
for i, frame_ratios in generate_read_frame_distribution_permutations().items():
    metagene = generate_metagene(frame_ratios, 0, 100)
    simulated_metagenes['start'] = {
        i: generate_permuted_profile_with_bins(list(metagene.values()), num_bins=5)
    }

In [8]:
# Get the positions and counts from the metagene dictionary
positions = list(metagene.keys())
counts = list(metagene.values())

# Create the bar plot
fig = go.Figure(data=go.Bar(x=positions, y=counts))

# Set the axis labels
fig.update_layout(xaxis_title='Position', yaxis_title='Count', title='Metagene with no periodicity')

# Show the plot
fig.show()