In [1]:
import pandas as pd

In [2]:
def mirna_to_mrna(string):
    """
    Convert a miRNA sequence to its complementary mRNA sequence.

    Args:
    - string (str): The miRNA sequence to convert.

    Returns:
    - str: The complementary mRNA sequence.

    Example usage:
    >>> mirna_to_mrna('UGAGGUAGUAGGUUGUAUAGUU')
    'UAUACAACCACUACUCCAUCA'
    """
    complement = {'A': 'U', 'C': 'G', 'G': 'C', 'U': 'A'}
    return ''.join(complement.get(base, base) for base in string)[::-1]



In [6]:
df = pd.read_csv("targetscan_mirnas.txt",
                    sep="\t", header=0)

# dropping non-human miRNAs
df = df[df["Species ID"] == 9606]
# dropping unnecessary columns
df = df.drop(["miR family", "Species ID"], axis=1)

# reordering columns
df = df.iloc[:, [1, 2, 0, 3, 4]]

# renaming columns
renaming_dict = {
    "Seed+m8": "seed",
    "MiRBase ID": "name",
    "Mature sequence": "sequence",
    "Family Conservation?": "conservation",
    "MiRBase Accession": "accession"
}

df.rename(columns=renaming_dict, inplace=True)

df["sequence"] = df["sequence"].apply(mirna_to_mrna)
df["seed"] = df["seed"].apply(mirna_to_mrna)

# trimming 7mer seed into 6mer
df["seed"] = df["seed"].str[1:]

In [8]:
df.to_csv("../../processed/targetscan/targetscan.csv", index=False)