In [None]:
import openpyxl
from openpyxl import Workbook
from openpyxl.worksheet.datavalidation import DataValidation

In [None]:
# Create a new workbook and select the active worksheet.
wb = Workbook()
ws = wb.active
ws.title = "Metadata Template"

# Define header row.
header = [
    "Category", 
    "Key", 
    "Description", 
    "Example Value", 
    "Controlled Vocabulary / Value Restrictions", 
    "Resource Link(s)"
]
ws.append(header)

# Define the metadata rows.
rows = [
    ["Study Information", "Title", 
     "A concise title describing the overall study", 
     "Analysis of Intestinal Crypt, Villus, and Polyp Cells", 
     "Free text; consider limiting length (e.g., ≤250 characters)", 
     "N/A"],
    
    ["Study Information", "Summary / Abstract", 
     "Detailed study summary and objectives", 
     "This study investigates transcriptomic changes under infection conditions.", 
     "Free text; follow standard writing guidelines", 
     "N/A"],
    
    ["Study Information", "Experimental Design", 
     "Overview of study design including factors, controls, and replicates", 
     "RNA-seq of single cells comparing infected vs. control conditions.", 
     "Free text; consider using standardized descriptors if available", 
     "https://www.ebi.ac.uk/arrayexpress/help/"],
    
    ["Sample Information", "Sample Identifier", 
     "Unique identifier for each biological sample", 
     "RNA-Seq_Sample_001", 
     "Must be unique; use an alphanumeric/underscore format (e.g., regex: ^[A-Za-z0-9_]+$)", 
     "N/A"],
    
    ["Sample Information", "Organism", 
     "Species name from which the sample is derived", 
     "Pseudomonas aeruginosa", 
     "Must conform to NCBI Taxonomy names", 
     "https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html"],
    
    ["Sample Information", "Tissue / Cell Type", 
     "Specific tissue, cell line, or cell type of origin", 
     "HeLa cells", 
     "Use controlled terms from ontologies such as Uberon or Cell Ontology", 
     "http://www.ontobee.org/"],
    
    ["Sample Information", "Strain Identifier", 
     "Strain information (if applicable)", 
     "JCM 14847", 
     "Use recognized strain IDs (e.g., from ATCC, DSMZ)", 
     "https://www.atcc.org/"],
    
    ["Sample Information", "Collection Date", 
     "Date on which the sample was collected", 
     "2025-03-24", 
     "Must follow ISO 8601 format (YYYY-MM-DD)", 
     "https://www.iso.org/iso-8601-date-and-time-format.html"],
    
    ["Experimental Conditions", "Treatment Conditions", 
     "Description of the experimental treatment(s) or conditions applied", 
     "Anti-Grem1 antibody treatment", 
     "If available, select from a controlled list (e.g., EFO terms)", 
     "https://www.ebi.ac.uk/efo/"],
    
    ["Experimental Conditions", "Treatment Duration", 
     "Duration for which the treatment was applied", 
     "10 weeks", 
     "Numeric value with a unit; standardize unit selection (e.g., hours, days, weeks)", 
     "N/A"],
    
    ["Experimental Conditions", "Concentration", 
     "Concentration or dosage of the treatment agent", 
     "30 mg/kg", 
     "Numeric value with unit; ensure consistency across samples", 
     "N/A"],
    
    ["Sequencing Details", "Library Preparation Method", 
     "Method or kit used for nucleic acid library preparation", 
     "TruSeq Stranded mRNA Library Prep Kit", 
     "Use a predefined list from vendor documentation or internal standards", 
     "https://www.illumina.com/"],
    
    ["Sequencing Details", "Sequencing Platform", 
     "Instrument or platform used for sequencing", 
     "Illumina NovaSeq", 
     "Must be selected from a controlled list (e.g., Illumina, PacBio, Oxford Nanopore)", 
     "https://www.ncbi.nlm.nih.gov/sra/docs/submitdesign/"],
    
    ["Sequencing Details", "Sequencing Depth", 
     "Number of reads obtained or overall depth of sequencing", 
     "50 million reads", 
     "Free numeric text; standard units should be used", 
     "N/A"],
    
    ["Sequencing Details", "Single or Paired-End", 
     "Specifies whether the sequencing was single-end or paired-end", 
     "Paired-end", 
     "Allowed values: Single-end, Paired-end", 
     "N/A"],
    
    ["Sequencing Details", "Instrument Model", 
     "Specific model of the sequencing instrument", 
     "Illumina HiSeq 2000", 
     "Use a controlled list (as provided by the manufacturer)", 
     "https://www.illumina.com/systems/sequencing-platforms.html"],
    
    ["Sequencing Details", "Quality Control Metrics", 
     "Key quality metrics such as RNA integrity number and quality scores", 
     "RIN > 8, Q30 > 85%", 
     "Free numeric values; standardize units and thresholds", 
     "N/A"],
    
    ["Bioinformatics Analysis", "Read Alignment Algorithm", 
     "Software used for aligning reads to the reference genome", 
     "HISAT2", 
     "Choose from a controlled list (e.g., HISAT2, STAR, Bowtie2)", 
     "https://github.com/ewels/MultiQC"],
    
    ["Bioinformatics Analysis", "Genome Reference", 
     "Reference genome or assembly used for analysis", 
     "Pseudomonas aeruginosa PAO1", 
     "Must match identifiers from recognized genome databases", 
     "https://www.ncbi.nlm.nih.gov/assembly/"],
    
    ["Bioinformatics Analysis", "Differential Expression Analysis", 
     "Software and statistical thresholds used for differential expression analysis", 
     "DESeq2 with FDR < 0.05", 
     "Choose from accepted tools (e.g., DESeq2, edgeR, limma)", 
     "https://bioconductor.org/packages/release/bioc/html/DESeq2.html"],
    
    ["Bioinformatics Analysis", "Gene Annotation", 
     "Databases or methods used for gene functional annotation", 
     "Ensembl, KEGG", 
     "Use controlled database names; if possible, select from a predefined list", 
     "https://www.ensembl.org/"],
    
    ["Ethical & Legal", "Ethical Approval", 
     "Details regarding ethics committee or IRB approval", 
     "IRB #12345", 
     "Free text; may follow a predefined institutional format", 
     "N/A"],
    
    ["Ethical & Legal", "Data Sharing Policy", 
     "Conditions or restrictions related to data sharing", 
     "Data available under CC-BY 4.0", 
     "Must be selected from a controlled list of licensing options", 
     "https://creativecommons.org/licenses/by/4.0/"],
    
    ["Contact Information", "Contributor / Contact Name", 
     "Primary contact or contributor for the study", 
     "Jane Doe", 
     "Free text; consider enforcing a 'Last Name, First Name' format", 
     "N/A"],
    
    ["Contact Information", "Contact Email", 
     "Email address of the study contact", 
     "jane.doe@example.com", 
     "Must follow standard email format", 
     "https://html.spec.whatwg.org/multipage/input.html#valid-e-mail-address"],
    
    ["Contact Information", "Contact Affiliation", 
     "Affiliation or institution of the study contact", 
     "Example University", 
     "Free text; optionally choose from a controlled list if available", 
     "https://grid.ac/"]
]

# Append each row to the worksheet.
for row in rows:
    ws.append(row)

# Example: Add data validation drop-down for the "Single or Paired-End" field.
# (Assuming that the allowed values are "Single-end" and "Paired-end".)
# In this example, we add a drop-down on the cell in the row corresponding to "Single or Paired-End"
# from our table. Adjust the range as needed.
validation = DataValidation(
    type="list", 
    formula1='"Single-end,Paired-end"', 
    allow_blank=False,
    showDropDown=True
)
# Here, suppose the "Example Value" for "Single or Paired-End" is in column D.
# Find the row for "Single or Paired-End". In our table, it is row 16 (including the header).
# We add validation to that cell (D17 because row indexing in openpyxl starts at 1 and header is row 1).
ws.add_data_validation(validation)
# Adjust the cell coordinate according to your generated file.
validation.add("D17")

# Save the workbook to a file.
output_filename = "metadata_template.xlsx"
wb.save(output_filename)
print(f"Excel metadata template generated as '{output_filename}'")
