In [1]:
# The technical support of Thermo Fischer has information regarding
# whether their siRNAs exhibit off-target effects or they target the
# alleged target at all
# Thus, it is tried to obtain this kind of information for all siRNAs
# from Thermo Fischer (Ambion has been purchased by Thermo Fischer)

In [1]:
import numpy as np
import pandas as pd

In [2]:
dtype_dict = {
    "Ensembl_ID_OnTarget_Ensembl_GRCh38_release_87": str,
    "Ensembl_ID_OnTarget_NCBI_HeLa_phs000643_v3_p1_c1_HMB": str,
    "Gene_Description": str,
    "ID": str,
    "ID_OnTarget_Ensembl_GRCh38_release_87": str,
    "ID_OnTarget_Merge": str,
    "ID_OnTarget_NCBI_HeLa_phs000643_v3_p1_c1_HMB": str,
    "ID_OnTarget_RefSeq_20170215": str,
    "ID_manufacturer": str,
    "Name_alternatives": str,
    "PLATE_QUALITY_DESCRIPTION": str,
    "RefSeq_ID_OnTarget_RefSeq_20170215": str,
    "Seed_sequence_common": str,
    "WELL_QUALITY_DESCRIPTION": str,
    "siRNA_error": str,
    "siRNA_number": str,
    "Precursor_Name": str
}

siRNA_df = pd.read_csv(
    "VACV_Report_only_valid_single_pooled_siRNA_and_esiRNA.csv",
    sep="\t",
    dtype=dtype_dict
)

In [3]:
ambion_subset = siRNA_df[
    siRNA_df["Manufacturer"] == "Ambion"
]

ambion_siRNA_types = ambion_subset["WellType"].unique()

print(
    "Unique siRNA types comprised in the Ambion (Thermo Fisher) "
    f"subset: {', '.join(ambion_siRNA_types)}"
)

Unique siRNA types comprised in the Ambion (Thermo Fisher) subset: SIRNA


In [4]:
ambion_unique_cat_numbers = ambion_subset["Catalog_number"].unique()

# The 10 catalogue numbers Thermo Fischer has already given information
# about are not comprised in the DataFrame and hence don't have to be
# filtered out
# This is due to the fact that only targets that are valid, i.e. not
# withdrawn by NCBI, and named are included in the DataFrame
print(
    "Amount of unique catalogue numbers comprised in the Ambion "
    f"subset: {len(ambion_unique_cat_numbers)}"
)

Amount of unique catalogue numbers comprised in the Ambion subset: 5864


In [11]:
# Save the 5.864 unique catalogue numbers to a text file
# Bear in mind that the "with" context manager is preferred when working
# with files as it automatically takes care of closing files, even in
# case of errors/exceptions
# Conveniently enough, Python provides the .writelines() method, which
# writes a sequence of strings (typically a list) to a file
# However, contrary to what its name suggests, it does not add newline
# characters by default between sequence elements, but they have to be
# added manually
# The procedure below appends the newline character (\n) to each list
# element except the last one
# Using the underscore as separation character is possible since it does
# not occur in any of the catalogue numbers
cat_numbers_str = "\n_".join(ambion_unique_cat_numbers)
ambion_unique_cat_numbers_with_newline = cat_numbers_str.split("_")

with open("ambion_catalogue_numbers.txt", "w") as f:
    f.writelines(ambion_unique_cat_numbers_with_newline)

In [None]:
#