In [3]:
from dotenv import dotenv_values
from cryosparc.tools import CryoSPARC

# Load login credentials from .env file
env_vars = dotenv_values('.env')
license = env_vars['CRYOSPARC_LICENSE_ID']
host = env_vars['CRYOSPARC_HOST']
email = env_vars['CRYOSPARC_EMAIL']
password = env_vars['CRYOSPARC_PASSWORD']

# Connect to CryoSPARC instance
cs = CryoSPARC(
    license=license,
    host=host,
    base_port=39000,
    email=email,
    password=password
)

### Cryodrgn inputs
- downsample particle stacks (126) --> particle stacks
(can be also done in CS, or with cryodrgn script)
- select random subset 100.000 particles
- cs refinement job to extract poses.pkl and ctk.pkl

#### Replace `cryodrgn downsample` with `cryodrgn preprocess`
cryodrgn preprocess P10_J712_particles_exported.cs \
		--datadir P10/exports/groups/P10_J628_particles/J626/extract \
		-D 128 \
		-o data/preprocessed/128/particles.mrcs

#### Parse pose information as usual, specifying the refinement box size with -D
cryodrgn parse_pose_csparc P10_J712_particles_exported.cs \
		-D 256 \
		-o data/pose.pkl

#### Parse CTF information as usual
cryodrgn parse_ctf_csparc P10_J712_particles_exported.cs -o data/ctf.pkl

#### Run cryoDRGN with preprocessed particles.ft.txt and extra flag --preprocessed
cryodrgn train_vae data/preprocessed/128/particles.ft.txt \
		--preprocessed \
		--ctf data/ctf.pkl \
		--poses data/pose.pkl \
		--zdim 8 \
		-n 50 \
		-o 00_vae128 >> 00.log

In [17]:
project=cs.find_project("P42")
workspace="W6"

title="cryodrgn"
job=project.create_external_job(workspace, title=title)


In [112]:
# Start the job and set its status to "running"
job.log(f"Starting job - {job.uid}")
job.start(status="running")

In [61]:
refinement_job = project.find_job("J225")
particle_file_list = refinement_job.list_files()
particles_file = sorted([file for file in particle_file_list if file.endswith("particles.cs") and not file.endswith("passthrough_particles.cs") ])[-1]
particles_file_path=f"{str(refinement_job.dir())}/{particles_file}"
print(particles_file_path)

/media/longstorage/Tadej/CS-walker/J225/J225_004_particles.cs


In [68]:
# Copy particles file
particles_file_final_path=f"{job.dir()}/{particles_file}"
job.cp(particles_file_path,particles_file_final_path)

In [None]:
# Make particles dir
#job.mkdir("particles_data", exist_ok=True)
#particles = job.load_input("particles", ["blob"])
#for particle in particles.rows():
#    source = particle["blob/path"]
#    target = job.uid + "/particles_data/" + source.split("/")[-1]
    #project.symlink(source, target)
#    print(source, target)
    ### problem.. its the whole mrc file

In [103]:
particles = job.load_input("particles", ["blob"]).rows()
number_of_particles=len(particles)
print(number_of_particles)

datadir="/".join(particles[0]["blob/path"].split("/")[:-1])
downsampled_size=128
subset=10000

758578


In [104]:

job.subprocess(
    f"cryodrgn_utils select_random {number_of_particles} -n {subset} -o ind{subset}.pkl".split(" "),
    cwd=job.dir(),
    mute=False,
    checkpoint=True
)

(INFO) (utils.py) (13-Jul-23 21:51:08) Note: NumExpr detected 64 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
(INFO) (utils.py) (13-Jul-23 21:51:08) NumExpr defaulting to 8 threads.
758578 total particles
10000 particles in selection: [    45     47     56 ... 758231 758351 758491]
Saving ind10000.pkl


In [94]:
#cryodrgn preprocess P10_J712_particles_exported.cs \
		#--datadir P10/exports/groups/P10_J628_particles/J626/extract \
		#-D 128 \
		#-o data/preprocessed/128/particles.mrcs

In [98]:
project.dir()

PurePosixPath('/media/longstorage/Tadej/CS-walker')

In [105]:
# Replace `cryodrgn downsample` with `cryodrgn preprocess`????

# Downsample particles
job.subprocess(
    f"cryodrgn preprocess {particles_file} --datadir {project.dir()} --ind ind{subset}.pkl -D {downsampled_size} -o particles.{downsampled_size}.mrcs".split(" "),
    cwd=job.dir(),
    mute=False,
    checkpoint=True
)


(INFO) (utils.py) (13-Jul-23 21:51:50) Note: NumExpr detected 64 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
(INFO) (utils.py) (13-Jul-23 21:51:50) NumExpr defaulting to 8 threads.
(INFO) (preprocess.py) (13-Jul-23 21:52:01) Filtering image dataset with /media/longstorage/Tadej/CS-walker/J293/ind10000.pkl
(INFO) (preprocess.py) (13-Jul-23 21:52:01) Loading 10000 250x250 images
(INFO) (preprocess.py) (13-Jul-23 21:52:01) Downsampling images to 128x128
(INFO) (preprocess.py) (13-Jul-23 21:52:01) Processing chunk 1 of 1
(INFO) (preprocess.py) (13-Jul-23 21:52:01) Processing batch of 5000 images (1 of 2)
(INFO) (preprocess.py) (13-Jul-23 21:59:50) Processing batch of 5000 images (2 of 2)
(INFO) (preprocess.py) (13-Jul-23 22:08:27) New shape: (10000, 129, 129)
(INFO) (preprocess.py) (13-Jul-23 22:08:27) Saving /media/longstorage/Tadej/CS-walker/J293/particles.128.0.ft.mrcs
(INFO) (preprocess.py) (13-Jul-23 22:08:30) Saving summary txt file /media/longstorage/Tadej

In [108]:
#why is shape 129??
size=250 # get from map?

In [110]:
job.subprocess(
    f"cryodrgn parse_pose_csparc {particles_file} -D {size} -o pose.pkl".split(" "),
    cwd=job.dir(),
    mute=False,
    checkpoint=True
)

(INFO) (utils.py) (13-Jul-23 22:24:23) Note: NumExpr detected 64 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
(INFO) (utils.py) (13-Jul-23 22:24:23) NumExpr defaulting to 8 threads.
0 uid 14438296101616066123
1 blob/path b'J206/extract/012449035513946905941_FoilHole_23398537_Data_23403785_23403787_20230625_105531_Fractions_patch_aligned_doseweighted_particles.mrc'
2 blob/idx 0
3 blob/shape [250 250]
4 blob/psize_A 0.76
5 blob/sign -1.0
6 blob/import_sig 0
7 ctf/type b'spline'
8 ctf/exp_group_id 7
9 ctf/accel_kv 200.0
10 ctf/cs_mm 2.7
11 ctf/amp_contrast 0.1
12 ctf/df1_A 14074.505
13 ctf/df2_A 12539.849
14 ctf/df_angle_rad 0.39969373
15 ctf/phase_shift_rad 0.0
16 ctf/scale 1.0
17 ctf/scale_const 1.0
18 ctf/shift_A [0. 0.]
19 ctf/tilt_A [0. 0.]
20 ctf/trefoil_A [0. 0.]
21 ctf/tetra_A [0. 0. 0. 0.]
22 ctf/anisomag [0. 0. 0. 0.]
23 ctf/bfactor 0.0
24 alignments3D/split 0
25 alignments3D/shift [-1.9140625  0.0390625]
26 alignments3D/pose [ 1.9459642  -0.45931774  0

In [111]:
job.subprocess(
    f"cryodrgn parse_ctf_csparc {particles_file} -o ctf.pkl".split(" "),
    cwd=job.dir(),
    mute=False,
    checkpoint=True
)

(INFO) (utils.py) (13-Jul-23 22:26:42) Note: NumExpr detected 64 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
(INFO) (utils.py) (13-Jul-23 22:26:42) NumExpr defaulting to 8 threads.
(INFO) (parse_ctf_csparc.py) (13-Jul-23 22:26:44) 758578 particles
(INFO) (ctf.py) (13-Jul-23 22:26:44) Image size (pix)  : 250
(INFO) (ctf.py) (13-Jul-23 22:26:44) A/pix             : 0.7599999904632568
(INFO) (ctf.py) (13-Jul-23 22:26:44) DefocusU (A)      : 14074.5048828125
(INFO) (ctf.py) (13-Jul-23 22:26:44) DefocusV (A)      : 12539.8486328125
(INFO) (ctf.py) (13-Jul-23 22:26:44) Dfang (deg)       : 22.90076368321733
(INFO) (ctf.py) (13-Jul-23 22:26:44) voltage (kV)      : 200.0
(INFO) (ctf.py) (13-Jul-23 22:26:44) cs (mm)           : 2.700000047683716
(INFO) (ctf.py) (13-Jul-23 22:26:44) w                 : 0.10000000149011612
(INFO) (ctf.py) (13-Jul-23 22:26:44) Phase shift (deg) : 0.0
(INFO) (parse_ctf_csparc.py) (13-Jul-23 22:26:44) Saving /media/longstorage/Tadej/CS-walk

cryodrgn train_vae data/preprocessed/128/particles.ft.txt \
		--preprocessed \
		--ctf data/ctf.pkl \
		--poses data/pose.pkl \
		--zdim 8 \
		-n 50 \
		-o 00_vae128 >> 00.log

In [114]:
particles_input="particles.128.ft.txt"

In [None]:
# Cuda not working.. I had differen env ..> cryodrgn3 or cryofire3
# Need only ind of poses and ctf...
# compare pkl?

In [116]:
job.subprocess(
    f"cryodrgn train_vae {particles_input} --preprocessed --ctf ctf.pkl --poses pose.pkl --zdim 8 -n 30 -o 00_vae128".split(" "),
    cwd=job.dir(),
    mute=False,
    checkpoint=True
)

(INFO) (utils.py) (13-Jul-23 22:34:48) Note: NumExpr detected 64 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
(INFO) (utils.py) (13-Jul-23 22:34:48) NumExpr defaulting to 8 threads.
(INFO) (train_vae.py) (13-Jul-23 22:34:49) /home/perun-d12/anaconda3/envs/cryodrgn/bin/cryodrgn train_vae particles.128.ft.txt --preprocessed --ctf ctf.pkl --poses pose.pkl --zdim 8 -n 30 -o 00_vae128
(INFO) (train_vae.py) (13-Jul-23 22:34:49) Namespace(particles='/media/longstorage/Tadej/CS-walker/J293/particles.128.ft.txt', outdir='/media/longstorage/Tadej/CS-walker/J293/00_vae128', zdim=8, poses='/media/longstorage/Tadej/CS-walker/J293/pose.pkl', ctf='/media/longstorage/Tadej/CS-walker/J293/ctf.pkl', load=None, checkpoint=1, log_interval=1000, verbose=False, seed=97505, ind=None, invert_data=True, window=True, window_r=0.85, datadir=None, lazy=False, preprocessed=True, num_workers_per_gpu=4, max_threads=16, tilt=None, tilt_deg=45, num_epochs=30, batch_size=8, wd=0, lr=0.0001, be

RuntimeError: Subprocess ['cryodrgn', 'train_vae', 'particles.128.ft.txt', '--preprocessed', '--ctf', 'ctf.pkl', '--poses', 'pose.pkl', '--zdim', '8', '-n', '30', '-o', '00_vae128'] exited with status 1

In [None]:
# Connect particle_stacks from refinement job

# Get .cs file

# Perform downsampling, ctf and poses

# Run cryoDRGN