In [1]:
!snakemake --version

7.32.4


In [6]:
snakefile_content = """
rule hello:
    output: "hello.txt"
    shell: "echo 'Hello, Snakemake!' > {output}"
"""

with open("Snakefile", "w") as f:
    f.write(snakefile_content)
!snakemake -s Snakefile -c1



[33mBuilding DAG of jobs...[0m
[33mUsing shell: /usr/bin/bash[0m
[33mProvided cores: 1 (use --cores to define parallelism)[0m
[33mRules claiming more threads will be scaled down.[0m
[33mJob stats:
job      count
-----  -------
hello        1
total        1
[0m
[33mSelect jobs to execute...[0m
[32m[0m
[32m[Fri Apr 18 05:35:36 2025][0m
[32mrule hello:
    output: hello.txt
    jobid: 0
    reason: Missing output files: hello.txt
    resources: tmpdir=/tmp[0m
[32m[0m
[32m[Fri Apr 18 05:35:36 2025][0m
[32mFinished job 0.[0m
[32m1 of 1 steps (100%) done[0m
[33mComplete log: .snakemake/log/2025-04-18T053536.549426.snakemake.log[0m


In [10]:
snakefile_content = """
rule all:
    input: "output.txt"

rule generate_message:
    output: "output.txt"
    shell: "echo 'Hello Snakemake!' > {output}"
"""

with open("Snakefile", "w") as f:
    f.write(snakefile_content)
!snakemake -s Snakefile -c1
!cat output.txt


[33mBuilding DAG of jobs...[0m
[33mNothing to be done (all requested files are present and up to date).[0m
[33mComplete log: .snakemake/log/2025-04-18T054510.052301.snakemake.log[0m
Hello Snakemake!


In [11]:
snakefile_content = """
rule all:
    input:
        "results/length.txt"

rule create_text:
    output:
        "data.txt"
    shell:
        "echo 'snakemake is powerful and fun to use' > {output}"

rule uppercase:
    input:
        "data.txt"
    output:
        "results/upper.txt"
    shell:
        "cat {input} | tr 'a-z' 'A-Z' > {output}"

rule count_chars:
    input:
        "results/upper.txt"
    output:
        "results/length.txt"
    shell:
        "wc -m < {input} | tr -d ' ' > {output}"
"""

with open("Snakefile", "w") as f:
    f.write(snakefile_content)
!snakemake -s Snakefile -c1
!cat output.txt


[33mBuilding DAG of jobs...[0m
[33mUsing shell: /usr/bin/bash[0m
[33mProvided cores: 1 (use --cores to define parallelism)[0m
[33mRules claiming more threads will be scaled down.[0m
[33mJob stats:
job            count
-----------  -------
all                1
count_chars        1
create_text        1
uppercase          1
total              4
[0m
[33mSelect jobs to execute...[0m
[32m[0m
[32m[Fri Apr 18 05:47:46 2025][0m
[32mrule create_text:
    output: data.txt
    jobid: 3
    reason: Missing output files: data.txt
    resources: tmpdir=/tmp[0m
[32m[0m
[32m[Fri Apr 18 05:47:46 2025][0m
[32mFinished job 3.[0m
[32m1 of 4 steps (25%) done[0m
[33mSelect jobs to execute...[0m
[32m[0m
[32m[Fri Apr 18 05:47:46 2025][0m
[32mrule uppercase:
    input: data.txt
    output: results/upper.txt
    jobid: 2
    reason: Missing output files: results/upper.txt; Input files updated by another job: data.txt
    resources: tmpdir=/tmp[0m
[32m[0m
[32m[Fri Apr 18 05:47

In [12]:
print("📝 Original Text:")
!cat data.txt

print("\n🔠 Uppercase Text:")
!cat results/upper.txt

print("\n🔢 Character Count:")
!cat results/length.txt


📝 Original Text:
snakemake is powerful and fun to use

🔠 Uppercase Text:
SNAKEMAKE IS POWERFUL AND FUN TO USE

🔢 Character Count:
37


In [13]:
import os

# 🗂️ Make sure the input directory exists
os.makedirs("input", exist_ok=True)

# 📝 Create sample input for "alice"
with open("input/alice.txt", "w") as f:
    f.write("alice loves snakemake")

# 📝 Create sample input for "bob"
with open("input/bob.txt", "w") as f:
    f.write("bob thinks snakemake is cool")


In [16]:
snakefile_content = """
# 🧪 STEP 1: Define the list of sample names you want to process
# These will match the names of the files in the 'input/' folder.
SAMPLES = ["alice", "bob"]

# 🎯 STEP 2: The 'all' rule defines the final output files that we expect
# Snakemake will figure out how to create them using the other rules
rule all:
    input:
        # For each sample, we expect a character count file as the final output
        expand("results/{sample}.length.txt", sample=SAMPLES)

# 🔠 STEP 3: This rule turns the text in each sample file into uppercase
rule uppercase:
    input:
        # The input file pattern — one per sample
        "input/{sample}.txt"
    output:
        # The output file goes into a 'results/' folder, uppercased version
        "results/{sample}.upper.txt"
    shell:
        # Use 'tr' to translate lowercase to uppercase characters
        "cat {input} | tr 'a-z' 'A-Z' > {output}"

# 🔢 STEP 4: This rule counts how many characters are in the uppercase file
rule count_chars:
    input:
        # Takes the uppercase text file from the previous rule
        "results/{sample}.upper.txt"
    output:
        # Final output: a file with a single number (the char count)
        "results/{sample}.length.txt"
    shell:
        # 'wc -m' counts characters; 'tr -d' removes extra space
        "wc -m < {input} | tr -d ' ' > {output}"
"""

# 📝 Save the Snakefile so Snakemake can read it
with open("Snakefile", "w") as f:
    f.write(snakefile_content)


In [17]:
# 🚀 Run Snakemake using 1 core (-c1)
!snakemake -s Snakefile -c1


[33mBuilding DAG of jobs...[0m
[33mUsing shell: /usr/bin/bash[0m
[33mProvided cores: 1 (use --cores to define parallelism)[0m
[33mRules claiming more threads will be scaled down.[0m
[33mJob stats:
job            count
-----------  -------
all                1
count_chars        2
uppercase          2
total              5
[0m
[33mSelect jobs to execute...[0m
[32m[0m
[32m[Fri Apr 18 06:08:02 2025][0m
[32mrule uppercase:
    input: input/alice.txt
    output: results/alice.upper.txt
    jobid: 2
    reason: Missing output files: results/alice.upper.txt
    wildcards: sample=alice
    resources: tmpdir=/tmp[0m
[32m[0m
[32m[Fri Apr 18 06:08:02 2025][0m
[32mFinished job 2.[0m
[32m1 of 5 steps (20%) done[0m
[33mSelect jobs to execute...[0m
[32m[0m
[32m[Fri Apr 18 06:08:02 2025][0m
[32mrule count_chars:
    input: results/alice.upper.txt
    output: results/alice.length.txt
    jobid: 1
    reason: Missing output files: results/alice.length.txt; Input files upd

In [18]:
# 🔍 Check results for each sample
for sample in ["alice", "bob"]:
    print(f"\n📂 Sample: {sample}")

    print("🔠 Uppercase version:")
    !cat results/{sample}.upper.txt

    print("🔢 Character count:")
    !cat results/{sample}.length.txt



📂 Sample: alice
🔠 Uppercase version:
ALICE LOVES SNAKEMAKE🔢 Character count:
21

📂 Sample: bob
🔠 Uppercase version:
BOB THINKS SNAKEMAKE IS COOL🔢 Character count:
28


In [19]:
!git status

On branch main
Your branch is up to date with 'origin/main'.

Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   snake1.ipynb[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31m.bash_logout[m
	[31m.bashrc[m
	[31m.cache/[m
	[31m.ipynb_checkpoints/[m
	[31m.ipython/[m
	[31m.jupyter-server-log.txt[m
	[31m.jupyter/[m
	[31m.local/[m
	[31m.npm/[m
	[31m.profile[m
	[31m.snakemake/[m
	[31mSnakefile[m
	[31mdata.txt[m
	[31mhello.txt[m
	[31minput/[m
	[31moutput.txt[m
	[31mresults/[m

no changes added to commit (use "git add" and/or "git commit -a")


In [21]:
!git add <snake1>

/bin/bash: -c: line 1: syntax error near unexpected token `newline'
/bin/bash: -c: line 1: `git add <snake1>'


In [22]:
!git add <your_modified_file(s)>

/bin/bash: -c: line 1: syntax error near unexpected token `('
/bin/bash: -c: line 1: `git add <your_modified_file(s)>'
