# Database Backup

**Objective:** Backup the whole database to CSV files in /data/.

In [1]:
import os, sys, shutil, math
import pandas as PD

In [2]:
BACKEND_PATH = os.path.dirname(os.path.dirname(os.getcwd()))
APPPROJ_PATH = os.path.join(BACKEND_PATH, 'app_proj')
sys.path.append(APPPROJ_PATH)
import notebooks as NT
print([x for x in dir(NT) if x[0]!='_' and len(x)>8])

['DataframeToDicts', 'GetRandom', 'NOTEBOOK_ENV']


In [3]:
MODULE_PATH = os.path.dirname(os.getcwd())
DATA_PATH = os.path.join(MODULE_PATH, 'data')
os.path.exists(DATA_PATH)

True

In [4]:
import django
os.environ['DEPLOYMENT_ENV'] = NT.NOTEBOOK_ENV
os.environ['DJANGO_ALLOW_ASYNC_UNSAFE'] = 'True'
MODULE_PATH = os.path.dirname(os.getcwd())
os.chdir(os.path.dirname(MODULE_PATH))
django.setup()

env: DEV


In [5]:
import django.db.models as JM 
import database as DB
print([x for x in dir(DB) if x[0]!='_' and len(x)>8])

['BaseManager', 'DeleteTable', 'GetNativeTableCounts', 'GetTableCounts', 'GetTableDictionary', 'InsertBulk', 'InsertSingle']


In [6]:
%load_ext autoreload
%autoreload 2
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'
import IPython.display as DS 
from ipywidgets import IntProgress

### Backup

In [7]:
from django.apps import apps

customTables = []
for module, app in apps.app_configs.items():

    if module in ['admin', 'auth', 'contenttypes', 'sessions']:
        continue

    modelLs = list(app.get_models())
    for m in modelLs:        
        customTables.append({
            'module': module,
            'table': str(m).split('.')[-1].replace("'>", ""),
        })

#PD.DataFrame(customTables)
NT.GetRandom(PD.DataFrame(customTables), 3)

Unnamed: 0,module,table
0,members,User
1,business_module,LegoSet
2,central,UserProfile


In [8]:
# reset the backup folder

backupPath = os.path.join(DATA_PATH, 'backup')

if os.path.exists(backupPath):
    shutil.rmtree(backupPath)    # os.remove has issues
os.mkdir(backupPath)

os.path.exists(backupPath)

True

In [9]:
DOC_SIZE = 4000   

for n, tb in enumerate(customTables):
    #if n > 4: continue
    
    # get table object 
    
    module = tb['module']
    table = tb['table']
    
    moduleObj = __import__(module)
    folderObj = getattr(moduleObj, 'models')
    classObj = getattr(folderObj, table)
    
    selectLs = list(classObj.objects.values('id'))
    print(table, '|', f"records: {len(selectLs):,}")

    # create folder for current table

    modulePath = os.path.join(backupPath, module)
    if not os.path.exists(modulePath):
        os.mkdir(modulePath)
    tablePath = os.path.join(backupPath, module, table)
    if not os.path.exists(tablePath):
        os.mkdir(tablePath)
    
    # loop over table and output document pages

    startId = classObj.objects.aggregate(JM.Min('id'))['id__min'] or 0
    endId = classObj.objects.aggregate(JM.Max('id'))['id__max'] or 0
    pages = math.ceil((endId - startId +1) / DOC_SIZE)
    #print(startId, endId)

    progressBar = IntProgress(min=0, max=pages) 
    DS.display(progressBar) 

    if startId == 0:
        progressBar.value += 1
        continue

    for p in range(0, pages):
        currentStart = startId + p * DOC_SIZE
        currentEnd = currentStart + DOC_SIZE
        query = classObj.objects.values().filter(id__gte=currentStart).filter(id__lt=currentEnd)
        currentRecords = list(query)
        #print(currentStart, currentEnd)

        currentPath = os.path.join(tablePath, f"{table}-{str(p+1).zfill(3)}.csv")
        curentDf = PD.DataFrame(currentRecords)
        #print('records found:', curentDf.shape[0])
        
        if not curentDf.empty:
            curentDf = curentDf.drop('id', axis=1)
            curentDf.to_csv(currentPath, index=False)

        progressBar.value += 1

User | records: 0


IntProgress(value=0, max=1)

LegoSet | records: 6,181


IntProgress(value=0, max=2)

UserProfile | records: 0


IntProgress(value=0, max=1)