<img src= "/files/tables/avatar.jpg" width="100" height="100" />
 
```

Name:         1-nb-storage-management

Design Phase:
    Author:   John Miner
    Date:     12-01-2020
    Purpose:  How to manage the databricks file system (dbfs)

Learning Guide:
    1 - Library vs magic commands
    2 - Working with directories (create, delete, copy, rename)
    3 - Working with files (create, delete, copy, rename)
    4 - Local vs remote storage
    
```

In [0]:
#
#  1.0 - List available commands
#

In [0]:
%fs help

In [0]:
dbutils.fs.help()

In [0]:
#
#  2.0 - Magic vs library commands
#


In [0]:
%fs 
ls /

path,name,size,modificationTime
dbfs:/FileStore/,FileStore/,0,1600293586000
dbfs:/Insight/,Insight/,0,1646874643000
dbfs:/databricks/,databricks/,0,1626015806000
dbfs:/databricks-datasets/,databricks-datasets/,0,0
dbfs:/databricks-results/,databricks-results/,0,0
dbfs:/lake/,lake/,0,1646875480000
dbfs:/mnt/,mnt/,0,1600292680000
dbfs:/rissug/,rissug/,0,1616108161000
dbfs:/tmp/,tmp/,0,1600290100000
dbfs:/user/,user/,0,1600294315000


In [0]:
dbutils.fs.ls("/")

In [0]:
#
# 3.0 - Manage local storage
#

In [0]:
#
# 3.1 - Create sample data lake
#

# remove existing
try:
  dbutils.fs.rm("/lake", recurse=True)  
except:
  pass

# root
dbutils.fs.mkdirs("/lake")

# raw quality
dbutils.fs.mkdirs("/lake/bronze")

# refine quality
dbutils.fs.mkdirs("/lake/silver")

# gold quality
dbutils.fs.mkdirs("/lake/gold")

# show folder structure
dbutils.fs.ls("/lake")


In [0]:
#
# 3.2 - Show sample files
#


In [0]:
# sample readme
dbutils.fs.ls("/databricks-datasets/power-plant/")

In [0]:
# show top 500 bytes
dbutils.fs.head("/databricks-datasets/power-plant/README.md", 1000)

In [0]:
# sample files
dbutils.fs.ls("/databricks-datasets/power-plant/data")


In [0]:
# show top 500 bytes
dbutils.fs.head("/databricks-datasets/power-plant/data/Sheet1.tsv", 500)

In [0]:
# Can write to local storage
dbutils.fs.put("/Insight/hello-world.txt", "This is my first program")

In [0]:
# Show new file
dbutils.fs.ls("/Insight/hello-world.txt")

In [0]:
# Remove new file
dbutils.fs.rm("/Insight/hello-world.txt")

In [0]:
# Can write to external storage
path = "/dbfs/mnt/datalake/hello-world.txt"
f = open(path, "w")
f.write("This is my first program")
f.close()


In [0]:
# look in top directory
import glob  
for name in glob.glob('/dbfs/mnt/datalake/*.txt'):
  print(name)
  

In [0]:
# traverse directories
import os

print("~ files ~\n")
for root, dirs, files in os.walk("/dbfs/mnt/datalake/bronze", topdown=False):
  for name in files:
    print(os.path.join(root, name))

print("\n~ directories ~\n")
for root, dirs, files in os.walk("/dbfs/mnt/datalake/bronze", topdown=False):
  for name in dirs:
    print(os.path.join(root, name))
    

In [0]:
# Remove new file
import os
os.remove("/dbfs/mnt/datalake/hello-world.txt")


In [0]:
#
# 3.3 - Copy files and folders
#

# remove existing
try:
  dbutils.fs.rm("/lake/data", recurse=True)  
except:
  pass

# copy dir
dbutils.fs.cp("/databricks-datasets/power-plant/data", "/lake/data", recurse=True)

# copy file
dbutils.fs.cp("/databricks-datasets/power-plant/README.md", "/lake/data/README.md")

# show folder structure
dbutils.fs.ls("/lake/data")


In [0]:
#
# 3.4 - Move files and folders
#

# bronze -> junk
dbutils.fs.mv("/lake/bronze", "/lake/junk", recurse=True)

# data -> bronze
dbutils.fs.mv("/lake/data", "/lake/bronze", recurse=True)

# readme.md -> readme.txt
dbutils.fs.mv("/lake/bronze/README.md", "/lake/bronze/readme.txt")

# show directory contents
dbutils.fs.ls("/lake/bronze")


In [0]:
#
# 3.4 - Remove files and folders
#



In [0]:
# remove 1 file
dbutils.fs.rm("dbfs:/lake/bronze/readme.txt")


In [0]:
# show dir
dbutils.fs.ls("/lake/bronze")

In [0]:
# no wild cards
dbutils.fs.rm("dbfs:/lake/bronze/*.tsv")


In [0]:
# remove dir + sub dirs
dbutils.fs.rm("/lake", recurse=True)


In [0]:
# show dirs
# dbutils.fs.ls("/lake/")
dbutils.fs.ls("/")


In [0]:
#
# 4.0 - Manage remote storage
#

In [0]:
#
# 4.1 - show mounts + files
#

In [0]:
# show existing mounts
dbutils.fs.mounts()

In [0]:
# adventure works files?
dbutils.fs.ls("/mnt/datalake/test")

In [0]:
# Unmount storage 
dbutils.fs.unmount("/mnt/datalake/")


In [0]:
#
# 4.2 - Re-mount Storage
#

# Set parameters for notebook
parms = {
"account_name": "sa4rissug2020",
"file_system": "sc4rissug2020",
"mount_point_path": "/mnt/datalake",
}

# Run notebook with selections
ret = dbutils.notebook.run("./2-nb-mount-adls-fs", 60, parms)

# Show return value if any
print(ret)



In [0]:
# adventure works files?
dbutils.fs.ls("/mnt/datalake/test")