# Demo of DVC

Getting started video: https://youtu.be/kLKBcPonMYw

In [1]:
import dvc
import json
import pandas as pd

from cryptography.fernet import Fernet

## Import data path and decryption key json as a dictionary

In [2]:
# Open the JSON file for reading
with open('paths.json', 'r') as file:
    # Load the JSON data from the file
    paths = json.load(file)

paths

{'file_path': 'gdrive://1GQ3CiuUXWaRM3N1SQmJ_Es9A8GiwJY_S',
 'decryption_key': '3AN4JXo0Gy2fWmU7szc6tkbUicR_P6AKkA5t5wEezqo='}

## Load data

In [4]:
data = pd.read_csv('./data/data.csv')
data

Unnamed: 0,Name,Quality
0,Mike,25
1,Anna,100
2,Kerry,100
3,Amy,100
4,Ken,75
5,Tom,25


## Encrypt data

In [4]:
# To generate a new key (save this to json file when first setting up encyption)
# key = Fernet.generate_key()
# key

In [5]:
key = paths['decryption_key']
f = Fernet(key)

In [6]:
# encrypt the dataframe
encrypted_data = data.applymap(lambda x: f.encrypt(str(x).encode()))

In [7]:
encrypted_data

Unnamed: 0,Name,Quality
0,b'gAAAAABkbfBK8QDYfD3wDjhcpECheYwJ6CjxynSepro_...,b'gAAAAABkbfBKtn_XW8Uiz1BeXD6B6TX9a1XQc9zLG9dZ...
1,b'gAAAAABkbfBKNvXQrDNJIMZL4XP9o16iteHX22WhY9Ih...,b'gAAAAABkbfBKMnEQFRUkxnjKS82p4KUwWHTK39rXICt4...
2,b'gAAAAABkbfBKEcjmsO0J7ZR5TLLAvtkGaBaCep2bVtIc...,b'gAAAAABkbfBKUxuChu7j2aXr0Bx_taf26GcFdRUKQkLQ...
3,b'gAAAAABkbfBKuz9IaCLFWJW_vhs7NbPycy7QnORHghIV...,b'gAAAAABkbfBK-N279aktvLhK2PfkaR9jptWNYfSj1MVy...


In [8]:
encrypted_data.to_csv('./data/encrypted_data.csv', index=False)

## Track with Git

Run these commands from terminal

`dvc init` # If DVC trtacking has not been set up

`dvc add ./data/encrypted_data.csv`

`git add data/encrypted_data.csv.dvc`

`git commit -m "Update DVC tracking"`

## Add remote storage location

From terminal

Take location from paths['file_path]

`dvc remote add -f -d storage gdrive://1GQ3CiuUXWaRM3N1SQmJ_Es9A8GiwJY_S`

`git commit -m "Add remote storage"`

Push data changes up to remote

`dvc push`