# How to access data in Azure

In [59]:
from pprint import pprint
import json

# azure storage
import azure.storage.blob as azureblob

## Using Azure Storage SDK
Package: pip install azure-storage-blob==1.3.1

To connect to Azure Storage we need the Account Name and Key.
This shouldn't be stored in repos because the Key is a secret. Currently it's being stored here because we're running in a private repo.

A good way is keeping this information in a config file like here and making sure this file is not added to the repo (through .gitignore)

Another way is storing the values in environment variables and making sure these are correctly setup on whatever target environment we want to run

I recommend installing Azure Storage Explorer to easily access data in a graphical tool: https://azure.microsoft.com/en-us/features/storage-explorer/

In [60]:
settings_file = "settings.json"

with open(settings_file) as f:
    settings = json.load(f)

pprint(settings, indent=4)

{   'storageAccountKey': 'oGMRWvuJkmdKQJq7tqc/ZMl8F32aRSNAGcx+jKb31iCBnv2r6L/0Hj1QatgzhN5sdt0l72ufVFmIcLEUBupXUw==',
    'storageAccountName': 'covidiastorage'}


In [61]:
!export COVIDIA_STORAGE_ACCOUNT_NAME=covidiastorage
!export COVIDIA_STORAGE_ACCOUNT_KEY=oGMRWvuJkmdKQJq7tqc/ZMl8F32aRSNAGcx+jKb31iCBnv2r6L/0Hj1QatgzhN5sdt0l72ufVFmIcLEUBupXUw==

In [62]:
# good way is to supply these values as environment variables so we don't keep secrets in the repo
account_name = os.getenv("COVIDIA_STORAGE_ACCOUNT_NAME")
account_key = os.getenv("COVIDIA_STORAGE_ACCOUNT_KEY")

In [64]:
print(account_name)

None


### Creating the client

In [41]:
# Create the blob client
blob_client = azureblob.BlockBlobService(
    account_name = settings["storageAccountName"],
    account_key = settings["storageAccountKey"])

### Listing Containers

In [42]:
# getting all the containers
containers = blob_client.list_containers()

for c in containers:
    print(c.name)

data


### Listing Files

In [43]:
# getting files in a container
files = blob_client.list_blobs(container_name="data")
for f in files:
    print(f.name)

influenza
influenza/ILINet.csv
influenza/WHO_NREVSS_Clinical_Labs.csv
influenza/WHO_NREVSS_Combined_prior_to_2015_16.csv
influenza/WHO_NREVSS_Public_Health_Labs.csv
opencovid19
opencovid19/chiffres-cles.json


In [44]:
# getting files in a container with a prefix
files = blob_client.list_blobs(container_name="data", prefix="influenza/")
for f in files:
    print(f.name)

influenza/ILINet.csv
influenza/WHO_NREVSS_Clinical_Labs.csv
influenza/WHO_NREVSS_Combined_prior_to_2015_16.csv
influenza/WHO_NREVSS_Public_Health_Labs.csv


### Uploading a file

In [45]:
# upload a file
file_name = "settings.json"
container_name = "test"

# returns true if created, false if already exist
blob_client.create_container(container_name, fail_on_exist=False)

# upload file
blob_client.create_blob_from_path(blob_name=file_name, file_path=file_name, container_name=container_name)

print("Uploaded", file_name, "to container", container_name)

Uploaded settings.json to container test


In [46]:
files = blob_client.list_blobs(container_name=container_name)
for f in files:
    print(f.name)

settings.json


### Downloading a file

In [47]:
# Download the blob to a local file
file_path = "temp.json"
blob_client.get_blob_to_path(container_name=container_name, blob_name=file_name, file_path=file_path)
print("Downloaded file to", file_path)

Downloaded file to temp.json


In [48]:
!cat temp.json

{
    "storageAccountName" : "covidiastorage",
    "storageAccountKey" : "oGMRWvuJkmdKQJq7tqc/ZMl8F32aRSNAGcx+jKb31iCBnv2r6L/0Hj1QatgzhN5sdt0l72ufVFmIcLEUBupXUw=="
}

In [53]:
!rm temp.json

### Deleting a file

In [49]:
# delete blob
blob_client.delete_blob(container_name=container_name, blob_name=file_name)
files = blob_client.list_blobs(container_name=container_name)
for f in files:
    print(f.name)

### Deleting a container

In [50]:
# delete container
blob_client.delete_container(container_name)

True

## Full Example

In [51]:
import pandas as pd
import os

# set variables
container_name = "data"
file_name = "influenza/ILINet.csv"
local_folder_name = "temp"
local_file_name = "ILINet.csv"
local_file_path = os.path.join(local_folder_name, local_file_name)

# download dataset
os.makedirs(local_folder_name,exist_ok=True)
blob_client.get_blob_to_path(container_name=container_name, blob_name=file_name, file_path=local_file_path)

# read with pandas
df = pd.read_csv(local_file_path, skiprows=1)
df.head(10)

Unnamed: 0,REGION TYPE,REGION,YEAR,WEEK,% WEIGHTED ILI,%UNWEIGHTED ILI,AGE 0-4,AGE 25-49,AGE 25-64,AGE 5-24,AGE 50-64,AGE 65,ILITOTAL,NUM. OF PROVIDERS,TOTAL PATIENTS
0,HHS Regions,Region 1,1997,40,0.498535,0.623848,15,,7.0,22,,0,44,32,7053
1,HHS Regions,Region 2,1997,40,0.374963,0.384615,0,,3.0,0,,0,3,7,780
2,HHS Regions,Region 3,1997,40,1.35428,1.34172,6,,7.0,15,,4,32,16,2385
3,HHS Regions,Region 4,1997,40,0.400338,0.45001,12,,23.0,11,,0,46,29,10222
4,HHS Regions,Region 5,1997,40,1.22926,0.901266,31,,24.0,30,,4,89,49,9875
5,HHS Regions,Region 6,1997,40,1.01898,0.747384,2,,1.0,2,,0,5,4,669
6,HHS Regions,Region 7,1997,40,0.871791,1.15286,0,,4.0,18,,5,27,14,2342
7,HHS Regions,Region 8,1997,40,0.516017,0.422654,2,,0.0,3,,0,5,5,1183
8,HHS Regions,Region 9,1997,40,1.80761,2.25878,80,,76.0,74,,13,243,23,10758
9,HHS Regions,Region 10,1997,40,4.74352,4.8254,31,,12.0,30,,3,76,13,1575


In [52]:
print(df.columns)

Index(['REGION TYPE', 'REGION', 'YEAR', 'WEEK', '% WEIGHTED ILI',
       '%UNWEIGHTED ILI', 'AGE 0-4', 'AGE 25-49', 'AGE 25-64', 'AGE 5-24',
       'AGE 50-64', 'AGE 65', 'ILITOTAL', 'NUM. OF PROVIDERS',
       'TOTAL PATIENTS'],
      dtype='object')


In [54]:
!rm temp -rf