In [None]:
from IPython.display import clear_output
import os

os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
import re
import dropbox
import sys
import pandas as pd
import numpy as np
import imageio.v2 as imageio
import matplotlib.pyplot as plt
import cv2
import logging
import datetime
import glob
import json
import scipy
import matplotlib as mpl

from subprocess import call
from tifffile import imwrite
from tqdm import tqdm
from pathlib import Path
from amftrack.util.dbx import (
    upload_folder,
    download,
    read_saved_dropbox_state,
    save_dropbox_state,
    load_dbx,
    get_dropbox_folders_prince,
    get_dropbox_video_folders,
    download_video_folders_drop,
    download_analysis_folders_drop,
)
from amftrack.pipeline.launching.run import (
    run_transfer,
)
from amftrack.pipeline.launching.run_super import run_parallel_transfer
from amftrack.pipeline.launching.run_super import run_parallel_flows
from amftrack.pipeline.functions.transport_processing.high_mag_videos.plot_data import (
    plot_summary,
    save_raw_data,
)
from amftrack.pipeline.functions.transport_processing.high_mag_videos.high_mag_analysis import (
    HighmagDataset,
    VideoDataset,
    EdgeDataset,
    index_videos_dropbox_new,
    analysis_run,
)
from amftrack.pipeline.functions.transport_processing.high_mag_videos.kymo_class import (
    KymoVideoAnalysis,
    KymoEdgeAnalysis,
)
from IPython.display import clear_output

%matplotlib widget
%load_ext autoreload
%autoreload 2
logging.basicConfig(stream=sys.stdout, level=logging.debug)
mpl.rcParams["figure.dpi"] = 200

## File declaration
As this notebook is designed to work with Snellius (now also on a local computer!), two items to separate are the raw video files and the analysis. The raw video files are large, bulky and not so easy to flip through. Ideally, the video files would be downloaded and the analysis would be stored on a separate folder structure entirely. That way, large scale analysis of analysis folders can happen when there are thousands of videos in the dataset, without having to have those raw video folders on hand.

Below function will basically make your folders fertile ground to accept all the video info folders and raw video files.

### Input:
Please give separately the folder where raw video data is stored, and where the analysis will be stored. Also give the dropbox address of the dataset you want to analyze.

### Output:
The specified dropbox folder will be looked through, and all relevant video information will be downloaded to an analysis folder structure identical to what is present on teh dropbox. The relevant raw video folder structure will also be generated, if specified so. Will also create cache files in the form of .json files such that next time, the scrounging does not have to happen.

In [None]:
# videos_folder = "F:\\AMOLF_Data\\videos\\"
# analysis_folder = "F:\\AMOLF_Data\\analysis\\"

# videos_folder = "/gpfs/scratch1/shared/amftrackflow/videos/"
# analysis_folder = "/gpfs/home6/svstaalduine/Analysis/"
videos_folder = "/projects/0/einf914/videos/"

analysis_folder = "/projects/0/einf914/analysis_videos/"

In [None]:
# dropbox_address = "/DATA/FLUORESCENCE/DATA_NileRed/"
# dropbox_address=  "/DATA/MYRISTATE/DATA/2_weeks/"
# dropbox_address = "/DATA/TransportROOT/DATA/"
# dropbox_address = "/DATA/MYRISTATE/MorrisonDATA/20230508_Plate067/"
dropbox_address = "/DATA/CocoTransport/"
dropbox_address = "/DATA/CocoCut/"

# dropbox_address = "/DATA/MYRISTATE/MorrisonDATA/"

# dropbox_address = "/DATA/TRANSPORT/DATA/20230308_Plate070/"

In [None]:
names = [
    "20230901_Plate310",
    "20230902_Plate310",
    "20230903_Plate310",
    "20230904_Plate310",
    "20230905_Plate310",
    "20230906_Plate310",
]
names = [
    "20230810_Plate441",
    "20230811_Plate441",
    "20230812_Plate441",
    "20230813_Plate441",
]
names = [
    "20230813_Plate449",
    "20230814_Plate449",
    "20230815_Plate449",
    "20230816_Plate449",
    "20230818_Plate449",
]
names = [
    "20240414_Plate625",
]

***To delete for 441***

20230809_1806_Plate14

20230809_2005_Plate14

20230809_2205_Plate14

20230812_0004_Plate14

20230812_1618_Plate14

20230813_2230_Plate14



***To delete for 449***

20230814_1218_Plate10

20230813_2219_Plate10

20230816_1227_Plate10

20230816_1628_Plate10

20230818_1307_Plate10

20230818_1523_Plate10



In [None]:
for name in names:
    video_param_frame = index_videos_dropbox_new(
        analysis_folder,
        videos_folder,
        f"{dropbox_address}{name}/",
        REDO_SCROUNGING=True,
        # date_start=20230801,
        # date_end=20230813,
        plate_names=None,
    )
    download_frame = video_param_frame.copy()
    run_parallel_transfer(
        "from_drop_video.py",
        [videos_folder],
        download_frame,
        20,
        "24:00:00",
        "transfer_test",
    )
    clear_output(wait=False)

clear_output(wait=True)

# Module 2: Processing

Now that the files have been downloaded, it's time to analyse them. In the below code, you'll be able to either do a complete survey of the analysis folder for as many videos as possible, or use the DataFrame of recently downloaded videos to filter for the videos you want to analyse.

Also possible to analyse videos directly in this notebook. Be aware again that this is a sequential, and slower analysis than running a SLURM job. 

### Input:
DataFrame filters of all videos to be analysed
### Output:
Print statements for all parameters of the analysis session that is about to take place.

In [None]:
# For indexing analysis folders
for name in names:
    video_param_frame = index_videos_dropbox_new(
        analysis_folder,
        videos_folder,
        f"{dropbox_address}{name}/",
        REDO_SCROUNGING=True,
        # date_start=20230801,
        # date_end=20230813,
        plate_names=None,
    )
    clear_output(wait=False)

clear_output(wait=True)

In [None]:
folder_filter = dropbox_address[5:]

img_infos = glob.glob(
    f"{analysis_folder}{folder_filter}/**/video_data.json", recursive=True
)
vid_anls_frame = pd.DataFrame()
for address in img_infos:
    add_info = pd.read_json(address, orient="index").T
    vid_anls_frame = pd.concat([vid_anls_frame, add_info], ignore_index=True)

vid_anls_frame = vid_anls_frame.sort_values("unique_id").reset_index(drop=True)
# vid_anls_frame.tail(20)

In [None]:
####################################################################################
### This is where you can apply the filters. Only those videos will be analyzed. ###
####################################################################################

# analysis_frame = vid_anls_frame[
#     vid_anls_frame["imaging_day"].ge("20230814")
# ].reset_index(drop=True)
# analysis_frame = vid_anls_frame[vid_anls_frame['xpos'].le(100)].reset_index(drop=True)
# analysis_frame = analysis_frame[analysis_frame['mode']=="F"]
# analysis_frame = vid_anls_frame[vid_anls_frame['plate_id'] != "20230729_Plate440"]
# analysis_frame = analysis_frame[analysis_frame['video_int'].isin([1])]
analysis_frame = vid_anls_frame
analysis_frame = analysis_frame.loc[analysis_frame["plate_id"].isin(names)]
####################################################################################
### Below code will prepare for those videos to be downloaded to videos_folder.  ###
####################################################################################

print(f"Number of videos to be analyzed: {len(analysis_frame)}")
# analysis_frame.tail(20)

In [None]:
analysis_frame["plate_id"].unique()

## Run SLURM Analysis job
Two options: For small analysis, use the first block. This will just do the calculations on the machine. For large-scale analysis, use the second block, as it will create a Snellius job.
## Input:
Snellius job parameters
## Output:
Analysis folder will be populated with analysis tiffs and csv sheets. At the same time, this analysis folder will also be uploaded to the dropbox.

In [None]:
### LARGE VIDEO ANALYSIS

nr_parallel = np.min([len(analysis_frame.index), 2])

run_parallel_flows(
    "flux_extract.py",
    [analysis_folder, 9, 0.95, 0.005, 200, dropbox_address],
    analysis_frame,
    nr_parallel,
    "2:00:00",
    "flux_extract",
    node="fat_rome",
    name_job="transport",
)
clear_output(wait=False)

print(
    "Sent all the jobs! Use the command '$ squeue' in the terminal to see the progress"
)

In [None]:
nr_parallel = np.min([len(analysis_frame.index), 5])

run_parallel_transfer(
    "flux_upload.py",
    [analysis_folder, 9, 0.95, 0.005, 200, dropbox_address],
    analysis_frame,
    nr_parallel,
    "6:00:00",
    "flux_upload",
    node="staging",
    cpus=1,
    # dependency = "flux_extract.sh",
    name_job="flux_upload.sh",
)
clear_output(wait=False)

print(
    "Sent all the jobs! Use the command '$ squeue' in the terminal to see the progress"
)

In [None]:
"20230810_Plate441_001" in list(analysis_frame["unique_id"].astype(str))

In [None]:
values_id = list(analysis_frame["unique_id"])
dbx = load_dbx()
img_infos = glob.glob(f"{analysis_folder}/**/video_data.json", recursive=True)
vid_anls_frame = pd.DataFrame()
for address in img_infos:
    if os.path.exists(address):
        add_info = pd.read_json(address, orient="index").T
        # print(add_info['unique_id'].iloc[0])
        if add_info["unique_id"].iloc[0] in values_id:
            plate_id_video = add_info["plate_id"].iloc[0]
            original_path = add_info["tot_path_drop"].iloc[0]

            # Replace the specific substring in the target path
            target_path = original_path.replace(
                f"/{plate_id_video}", f"/KymoSpeeDExtract/{plate_id_video}"
            )
            source = "/" + target_path + "/video_data_network.json"
            target = address.replace("video_data.json", "video_data_network.json")
            try:
                results = dbx.files_search(
                    "/" + target_path, "video_data_network.json"
                ).matches
                if results:
                    download(
                        source,
                        target,
                    )
            except dropbox.exceptions.ApiError:
                print(address)
    # break