In [1]:
from dataversioner.dataversioner import DataVersioner

import pandas as pd

In [2]:
df = pd.DataFrame([[1, 2, 3],[4, 5, 6], [7, 8, 9]], 
                  columns = ["a", "b", "c"])
df

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [3]:
dv = DataVersioner(df)

In [4]:
dv.commits()

['Initial df']

In [5]:
dv.data['sum'] = dv.data.sum(axis=1)
dv.data

Unnamed: 0,a,b,c,sum
0,1,2,3,6
1,4,5,6,15
2,7,8,9,24


In [6]:
print(dv.commits())
dv.commit("Row sum", "Added 'sum' of a, b, c")
print(dv.commits())

['Initial df']
['Initial df', 'Row sum']


In [7]:
dv.show_commit()

'Row sum' - Added 'sum' of a, b, c
Committed at 03:03 PM on Mar 08, 2022
   a  b  c  sum
0  1  2  3    6
1  4  5  6   15
2  7  8  9   24


In [8]:
dv.data['c * a'] = dv.data['c'] * dv.data['a']
dv.data

Unnamed: 0,a,b,c,sum,c * a
0,1,2,3,6,3
1,4,5,6,15,24
2,7,8,9,24,63


In [11]:
print(dv.commits())
dv.checkout('Row sum')

['Initial df', 'Row sum']


ValueError: The current data has uncommitted changes relative to 'Row sum'. Either commit changes or set allow_discard_changes to True to proceed.

In [12]:
dv.checkout('Row sum', allow_discard_changes = True)
dv.data

Unnamed: 0,a,b,c,sum
0,1,2,3,6
1,4,5,6,15
2,7,8,9,24


In [13]:
dv.data = pd.concat([dv.data, dv.data * 10], ignore_index=True)
dv.commit('Concat rows', 'Concatenated 10 * data to data')
dv.data

Unnamed: 0,a,b,c,sum
0,1,2,3,6
1,4,5,6,15
2,7,8,9,24
3,10,20,30,60
4,40,50,60,150
5,70,80,90,240


In [14]:
dv.commits()

['Initial df', 'Row sum', 'Concat rows']

In [16]:
dv.status()

'Concat rows' - Concatenated 10 * data to data
Committed at 03:03 PM on Mar 08, 2022
    a   b   c  sum
0   1   2   3    6
1   4   5   6   15
2   7   8   9   24
3  10  20  30   60
4  40  50  60  150
5  70  80  90  240


In [17]:
dv.show_commits()

Initial df
   -Row sum
      -Concat rows


In [19]:
dv.checkout('Initial df')
dv.data

Unnamed: 0,a,b,c
0,1,2,3
1,4,5,6
2,7,8,9


In [20]:
dv.data['d'] = [4, 7, 10]
dv.data

Unnamed: 0,a,b,c,d
0,1,2,3,4
1,4,5,6,7
2,7,8,9,10


In [21]:
dv.commit('Df w/ d', 'Continued pattern in new column, d')
dv.show_commits()

Initial df
   -Row sum
      -Concat rows
   -Df w/ d


In [22]:
dv.show_commit('Row sum')

'Row sum' - Added 'sum' of a, b, c
Committed at 03:03 PM on Mar 08, 2022
   a  b  c  sum
0  1  2  3    6
1  4  5  6   15
2  7  8  9   24
