# Data Versioner 
## Quick start

In [1]:
from dataversioner.dataversioner import DataVersioner
import pandas as pd

In [2]:
df = pd.DataFrame([[1, 2, 3],[4, 5, 6], [7, 8, 9]], 
                  columns = ["a", "b", "c"])
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [3]:
dv = DataVersioner(df)

In [4]:
dv.commits()

['Initial df']

In [5]:
dv.data['sum'] = dv.data.sum(axis=1)
dv.data

Unnamed: 0,a,b,c,sum
0,1,2,3,6
1,4,5,6,15
2,7,8,9,24


In [6]:
print(dv.commits())
dv.commit("Row sum", "Added 'sum' of a, b, c")
dv.commits()

['Initial df']


['Initial df', 'Row sum']

In [7]:
dv.status()

'Row sum' - Added 'sum' of a, b, c
Committed at 05:03 PM on Mar 08, 2022

   a  b  c  sum
0  1  2  3    6
1  4  5  6   15
2  7  8  9   24


In [8]:
dv.show_commit('Initial df')

'Initial df' - First commit of data
Committed at 05:03 PM on Mar 08, 2022

   a  b  c
0  1  2  3
1  4  5  6
2  7  8  9


In [9]:
dv.show_commits()

Initial df
   - Row sum


In [10]:
dv.checkout('Initial df')
dv.data

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


## Additional functionality

In [11]:
dv.data['c * a'] = dv.data['c'] * dv.data['a']
dv.data

Unnamed: 0,a,b,c,c * a
0,1,2,3,3
1,4,5,6,24
2,7,8,9,63


In [12]:
dv.checkout('Initial df')
dv.data

ValueError: The current data has uncommitted changes relative to 'Initial df'. Either commit changes or set allow_discard_changes to True to proceed.

In [13]:
dv.checkout('Initial df', allow_discard_changes = True)
dv.data

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [14]:
dv.data['c * a'] = dv.data['c'] * dv.data['a']
dv.data

Unnamed: 0,a,b,c,c * a
0,1,2,3,3
1,4,5,6,24
2,7,8,9,63


In [15]:
dv.commit("c times a", "Added product of c and a")
dv.commits()

['Initial df', 'Row sum', 'c times a']

In [16]:
dv.show_commits()

Initial df
   - Row sum
   - c times a


In [17]:
dv.checkout('Row sum')
dv.data

Unnamed: 0,a,b,c,sum
0,1,2,3,6
1,4,5,6,15
2,7,8,9,24


In [18]:
dv.data = pd.concat([dv.data, dv.data * 10], ignore_index=True)
dv.commit('Concat rows', 'Concatenated 10 * data to data')
dv.data

Unnamed: 0,a,b,c,sum
0,1,2,3,6
1,4,5,6,15
2,7,8,9,24
3,10,20,30,60
4,40,50,60,150
5,70,80,90,240


In [19]:
dv.show_commits()

Initial df
   - Row sum
      - Concat rows
   - c times a
