In [None]:
import os
import requests
import io
import pandas as pd
import pydicom
from pathlib import Path
import time



from pydicom.dataset import Dataset as DcmDataset
from pydicom.tag import BaseTag as DcmTag
from pydicom.multival import MultiValue as DcmMultiValue

#see https://stackoverflow.com/questions/15746558/how-to-send-a-multipart-related-with-requests-in-python

#from fastcore.foundation import * #Patch here (but also imports fastcore.imports (which is not sufficient))
    # But parallel is NOT here... :-(

from fastai2.data.all import *


In [None]:

#url = "https://dicom-server-instance.azurewebsites.net/studies"
url = "https://sjbdicomdemo.azurewebsites.net/studies"

dicom_server_name = "sjbdicomdemo"
url = f"https://{dicom_server_name}.azurewebsites.net/studies"
url

In [None]:
# Cell
@patch
def dcmread(fn:Path, force = False): #, force = False):
    "Open a `DICOM` file"
    return pydicom.dcmread(str(fn), force)

# Cell
def _cast_dicom_special(x):
    cls = type(x)
    if not cls.__module__.startswith('pydicom'): return x
    if cls.__base__ == object: return x
    return cls.__base__(x)

def _split_elem(res,k,v):
    if not isinstance(v,DcmMultiValue): return
    res[f'Multi{k}'] = 1
    for i,o in enumerate(v): res[f'{k}{"" if i==0 else i}']=o

        # Cell
@patch
def as_dict(self:DcmDataset, px_summ=False): #, window=dicom_windows.brain):
    pxdata = (0x7fe0,0x0010)
    vals = [self[o] for o in self.keys() if o != pxdata]
    its = [(v.keyword,v.value) for v in vals]
    res = dict(its)
    res['fname'] = self.filename
    for k,v in its: _split_elem(res,k,v)
    if not px_summ: return res
    stats = 'min','max','mean','std'
#    try:
#        pxs = self.pixel_array
#        for f in stats: res['img_'+f] = getattr(pxs,f)()
#        res['img_pct_window'] = self.pct_in_window(*window)
#    except Exception as e:
#        for f in stats: res['img_'+f] = 0
#        print(res,e)
    for k in res: res[k] = _cast_dicom_special(res[k])
    return res

# Cell
def _dcm2dict(fn, **kwargs): return fn.dcmread().as_dict(**kwargs)

# Cell
@delegates(parallel)
def _from_dicoms(cls, fns, n_workers=0, **kwargs):
    return pd.DataFrame(parallel(_dcm2dict, fns, n_workers=n_workers, **kwargs))
pd.DataFrame.from_dicoms = classmethod(_from_dicoms)

In [None]:

# Validate goes beyond checking the dicom header, but attempts a actually load every single file into a Pydicom Dataset
#    This later step has shown effective for ensuring that the metadata can be loaded into a dataset
def get_dicom_files_from_directory(dirpath,extension="*.dcm", validate=True):
    path = Path(dirpath) 
    # pydicom 2 has an is_valid method to replace is_dicom (maybe look at pydicom2 for other reasons)
    files = [x for x in path.rglob(extension)] # if pydicom.misc.is_dicom(str(x))] # checks for DICM in header (fast and loose)
    if validate:
        for file in files[:]:
            #pydicom.validate() #is this only in pydicom 2?
            try:
                print(file)
                x = dcmread(file)           
            except:
                # remove file if it throws an exception
                files.remove(x)
    #Potentially look at pydicom.dataset.validate_file_meta(file_meta, enforce_standard=True) for deeper validation
    #  https://pydicom.github.io/pydicom/dev/reference/generated/pydicom.dataset.validate_file_meta.html
    return files
    

In [None]:
#files = get_dicom_files_from_directory('C:\\githealth\\dicom-samples\\visus.com')
files = get_dicom_files_from_directory('C:\\!!delete4DLung', validate=False)
#files = get_dicom_files_from_directory('C:\\!DicomDataTCIA\\4D-Lung', validate=False)
print(len(files))
                                       

In [None]:
files = [x for x in files if not 'pydicom' in str(x)]
print(len(files))

In [None]:

# Can just use this to load all the files in a directory, instead of calling above
#dirname = 'C:\\githealth\\dicom-samples\\visus.com'
#files = [x for x in Path(dirname).rglob('*.dcm')] # if not 'pydicom' in str(x)] # exclude pydicom test files



In [None]:
# Stupid simple function to get a Dataframe, but can be extended later to add logic
def get_dicom_metadata_as_dataframe(list_of_files,columns=[],save_as_filename=''):
    df = pd.DataFrame.from_dicoms(files)
    if save_as_filename != '':
        df.to_csv(save_as_filename)
    return df
        
    



In [None]:
#df = pd.DataFrame.from_dicoms(files)
#df.to_csv('TestDicom2.csv')

In [None]:
df = get_dicom_metadata_as_dataframe(files)

In [None]:
df.to_csv('TestDicomMetadata-4DLungSubset.csv')

In [None]:
from urllib3.filepost import encode_multipart_formdata, choose_boundary

def encode_multipart_related(fields, boundary=None):
    if boundary is None:
        boundary = choose_boundary()

    body, _ = encode_multipart_formdata(fields, boundary)
    content_type = str('multipart/related; boundary=%s' % boundary)

    return body, content_type

In [None]:


#!pip install aiohttp
#!pip install aiohttp[speedups]

In [6]:
import asyncio
import aiohttp

testfile = 'C:\\githealth\\dicom-samples\\visus.com\\case1\\case1_008.dcm'

In [None]:
async def fetch(session, url):
    async with session.get(url) as response:
        return await response.text()

async def main():
    async with aiohttp.ClientSession() as session:
        html = await fetch(session, 'http://python.org')
        print(html)

    


In [4]:
async with aiohttp.ClientSession() as session:
    async with session.post('http://httpbin.org/post', data=b'data') as resp:
        print(resp.status)
        print(await resp.text())

200
{
  "args": {}, 
  "data": "data", 
  "files": {}, 
  "form": {}, 
  "headers": {
    "Accept": "*/*", 
    "Accept-Encoding": "gzip, deflate", 
    "Content-Length": "4", 
    "Content-Type": "application/octet-stream", 
    "Host": "httpbin.org", 
    "User-Agent": "Python/3.7 aiohttp/3.6.2", 
    "X-Amzn-Trace-Id": "Root=1-5ea0c0f5-1b8c9e2029ff3b106803bfa0"
  }, 
  "json": null, 
  "origin": "50.47.112.149", 
  "url": "http://httpbin.org/post"
}



In [8]:
async with aiohttp.ClientSession() as session:
    url = 'http://httpbin.org/post'
    files = {'file': open(testfile, 'rb')}

    async with session.post(url, data=files) as resp:
        print(resp.
        print(resp.status)
        print(await resp.text())

AttributeError: 'ClientResponse' object has no attribute 'request'

In [10]:
resp.request_info.headers

<CIMultiDictProxy('Host': 'httpbin.org', 'Accept': '*/*', 'Accept-Encoding': 'gzip, deflate', 'User-Agent': 'Python/3.7 aiohttp/3.6.2', 'Content-Length': '527987', 'Content-Type': 'multipart/form-data; boundary=748703b9782d411186f7e0caff7cab32')>

In [11]:
async with aiohttp.ClientSession() as session:
    url = 'http://httpbin.org/post'
    files = {'file': open(testfile, 'rb')}
    with aiohttp.MultipartWriter('related') as mpwriter:
        mpwriter.append(files)
    async with session.post(url, data=files) as resp:
        print(resp.status)
        print(await resp.text())


TypeError: Cannot create payload from {'file': <_io.BufferedReader name='C:\\githealth\\dicom-samples\\visus.com\\case1\\case1_008.dcm'>}

In [15]:
async with aiohttp.ClientSession() as session:
    url = 'http://httpbin.org/post'
    files = {'file': open(testfile, 'rb')}
    
    with open(testfile,'rb') as reader:
        rawfile = reader.read()
    files = {'file': ('dicomfile', rawfile, 'application/dicom')}

    async with session.post(url, data=files) as resp:
        print(resp.status)
        print(resp.headers)

200
<CIMultiDictProxy('Date': 'Wed, 22 Apr 2020 22:45:02 GMT', 'Content-Type': 'application/json', 'Content-Length': '2708833', 'Connection': 'keep-alive', 'Server': 'gunicorn/19.9.0', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Credentials': 'true')>


In [None]:
loop.run_until_complete(main())

In [None]:
resp.headers

In [None]:
def upload_single_dcm_file(server_url,filepath):
    with open(filepath,'rb') as reader:
        rawfile = reader.read()
    files = {'file': ('dicomfile', rawfile, 'application/dicom')}

    #encode as multipart_related
    body, content_type = encode_multipart_related(fields = files)
    
    headers = {'Accept':'application/dicom+json', "Content-Type":content_type}

    response = requests.post(url, body, headers=headers) #, verify=False)
    
    #return the response object to allow for further processing
    
    #example usage
    #r = upload_single_dcm_file(url,'C:\\githealth\\dicom-samples\\visus.com\\case4\\case4a_002.dcm')
    #print(r.status_code)
    #print(r.request.headers)
    
    return response

In [None]:
# Async calls
def upload_muliple_dcm_file(server_url,filepaths):
    
    
    
    with open(filepath,'rb') as reader:
        rawfile = reader.read()
    files = {'file': ('dicomfile', rawfile, 'application/dicom')}

    #encode as multipart_related
    body, content_type = encode_multipart_related(fields = files)
    
    headers = {'Accept':'application/dicom+json', "Content-Type":content_type}

    response = requests.post(url, body, headers=headers) #, verify=False)
    
    #return the response object to allow for further processing
    
    #example usage
    #r = upload_single_dcm_file(url,'C:\\githealth\\dicom-samples\\visus.com\\case4\\case4a_002.dcm')
    #print(r.status_code)
    #print(r.request.headers)
    
    return response

In [None]:
#r = upload_dcm_file(url,'C:\\githealth\\dicom-samples\\visus.com\\case4\\case4a_002.dcm')
#print(r.status_code)
#print(r.request.headers)

In [None]:
from collections import OrderedDict
def store_files_to_dicomweb_with_logging_to_dataframe(filepaths, df = None):
    if df is None:
        # Then we have a blank dataframe, maybe do something here.
        #   for now, I'm just using a blank one.
        #columns = ['fname','status_code','response_notes']
        df = pd.DataFrame()
        df['fname']=filepaths
    
    # Fastest to just use lists and dictionaries then create a new dataframe from that...
    numfiles=len(filepaths)
    current_count=0
    
    files_info = []
    for file in filepaths:
        
        # Upload a single file at a time (and time it)
        r = upload_single_dcm_file(url,file)     # call API and get response 
        d = OrderedDict()   # create a new ordered dictionary
        
        # Add the information to the dictionary
        d['fname'] = str(file)     
        d['method'] = r.request.method
        d['url'] = r.url
        d['path_url'] = r.request.path_url
        d['request_headers'] = str(r.request.headers)
        d['request_body_trimmed'] = r.request.body[0:150]   

        d['ok'] = r.ok
        d['status_code'] = r.status_code  
        d['reason'] = r.reason
        d['response_headers'] = r.headers
        d['response_text'] = r.text  
        d['elapsed_time'] = r.elapsed
        d['apparent_encoding'] = r.apparent_encoding
        d['encoding'] = r.encoding
        
 
        files_info.append(d)
        current_count += 1
        if current_count%250==0:
            print(f"{current_count} of {numfiles} uploaded")

    # Create a dataframe from the uploads
    df = pd.DataFrame(files_info)               
    return df
        
    
    

In [None]:
df = store_files_to_dicomweb_with_logging_to_dataframe(files)

In [None]:
df.head()

In [None]:
df.to_csv('TestDicomUploads-4DLungSubset.csv')

In [None]:
len(df)

In [None]:
start_time = time.time()
r = upload_single_dcm_file(url,'C:\\githealth\\dicom-samples\\visus.com\\case4\\case4a_002.dcm')
elapsed_time = time.time()-start_time
print(elapsed_time)

print(r.status_code)

In [None]:
print(str(r.request.headers))
r.request.headers.values

In [None]:
df['status_code'] = 'foo'

In [None]:
rows_list = []
for row in input_rows:

        dict1 = {}
        # get input row in dictionary format
        # key = col_name
        dict1.update(blah..) 

        rows_list.append(dict1)

df = pd.DataFrame(rows_list)               

In [None]:
start = time.time()


In [None]:
end = time.time()
print(end-start)


In [None]:
len(files)

In [None]:
%timeit
import os
rootdir = "C:\\githealth\\dicom-samples\\"

for subdir, dirs, files in os.walk(rootdir):
    if '.git' in subdir:
        continue
    for file in files:
        if not '.dcm' in file:
            continue
        fname = os.path.join(subdir, file)
        print(fname)
        r = upload_dcm_file(url, fname)
        print(r.status_code)