In [None]:
import numpy as np
import pandas as pd

# pd.set_option('display.height', 1000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.width", 1000)
pd.set_option("max_colwidth", 200)
from IPython.core.display import HTML, display

display(HTML("<style>.container { width:100% !important; }</style>"))


def df_stats(df):
    from tabulate import tabulate

    print("\n***** Shape: ", df.shape, " *****\n")

    columns_list = df.columns.values.tolist()
    isnull_list = df.isnull().sum().values.tolist()
    isunique_list = df.nunique().values.tolist()
    dtypes_list = df.dtypes.tolist()

    list_stat_val = list(zip(columns_list, isnull_list, isunique_list, dtypes_list))
    df_stat_val = pd.DataFrame(
        list_stat_val, columns=["Name", "Null", "Unique", "Dtypes"]
    )
    print(tabulate(df_stat_val, headers="keys", tablefmt="psql"))
    return df.head()

In [None]:
import os


def list_files(dir):
    r = []
    subdirs = [x[0] for x in os.walk(dir)]
    for subdir in subdirs:
        files = os.walk(subdir).__next__()[2]

        if len(files) > 0:
            for file in files:
                r.append(os.path.join(subdir, file))
    return r

In [None]:
r = list_files("/media/data1/ravram/DeepOCT_Abbott")
display(len(r), " files found")
df = pd.DataFrame(list(zip(r)), columns=["path"])
new_2 = df["path"].str.split("/", n=8, expand=True)

df["StudyInstanceUID"] = new_2[6]
df["patient_id"] = new_2[5]
df["dicom_id"] = new_2[7]
display(df.head(n=15))

In [None]:
df_extracted = pd.read_csv("data/Abbott_OCT/database-c-find_ABBOTT.csv")
df_extracted = pd.DataFrame({"path": r})
df_extracted["FileType"] = df_extracted.path.apply(lambda x: x.split(".")[-1])
display("Total files", df_extracted.FileType.value_counts())

df_extracted = df_extracted.loc[df_extracted["FileType"] == "dcm"]
new_2 = df_extracted["path"].str.split("/", n=7, expand=True)

df_extracted["mrn"] = new_2[5]
df_extracted["StudyInstanceUID"] = new_2[6]
df_extracted["dicom_id"] = new_2[7].str.rstrip(".dcm")
df_extracted_study_level = (
    df_extracted.groupby(["mrn", "StudyInstanceUID"]).first().reset_index()
)

In [None]:
df_extracted_m = pd.merge(
    df_extracted_study_level.drop(columns={"dicom_id", "path"}),
    df,
    how="inner",
    on=["StudyInstanceUID"],
)
display(
    df_extracted_m.loc[
        df_extracted_m["dicom_id"]
        == "1.3.12.2.1107.5.4.5.135214.30000022072511311760100000156.dcm"
    ]
)
df_extracted_m.to_csv("data/Abbott_OCT/df_extracted_m_ABBOTT.csv", index=False)

In [None]:
df_extracted_m = pd.read_csv("data/Abbott_OCT/df_extracted_m_ABBOTT.csv")
display(
    df_extracted_m.loc[
        df_extracted_m["StudyInstanceUID"] == "2.16.124.113611.1.118.1.1.5884039"
    ]
)

### Extract AVI Metadata




In [None]:
from downloadAvi import extract_avi_metadata as avi_meta

avi_meta.extract_avi_and_metadata("data/Abbott_OCT/df_extracted_m_ABBOTT.csv")