In [1]:
import os
import stat as STAT
import sqlite3
from tqdm import tqdm
import hashlib
from time import time

# cur_dirs:type
MFILE = 0
MDIR = 1
MLINK = 2
MOTHER = 3 # встречаются всякие сокеты, именованные каналы. Не смотря на то, что в /sys, /dev, /proc, /run - не лезем

# cur_dirs:modified
# 1 - modified
# 0 - not modified

class AttrDict(dict):
    def __getattr__(self, key):
        if key not in self:
            raise AttributeError(key) # essential for testing by hasattr
        return self[key]
    def __setattr__(self, key, value):
        self[key] = value
def make_dict(**kwargs):
    return AttrDict(kwargs)

In [2]:
def is_link(mode): return STAT.S_ISLNK(mode)
def is_dir(mode):  return STAT.S_ISDIR(mode) 
def is_file(mode): return STAT.S_ISREG(mode) 
def is_other(mode):return STAT.S_ISCHR(mode) or STAT.S_ISBLK(mode) or\
                    STAT.S_ISFIFO(mode) or STAT.S_ISSOCK(mode) or\
                    STAT.S_ISDOOR(mode) or STAT.S_ISPORT(mode) or\
                    STAT.S_ISWHT(mode) 
def simple_type(mode):
    typ = MLINK if STAT.S_ISLNK(mode) else\
        MDIR if STAT.S_ISDIR(mode) else\
        MFILE if STAT.S_ISREG(mode) else\
        MOTHER if STAT.S_ISCHR(mode) or STAT.S_ISBLK(mode) or\
            STAT.S_ISFIFO(mode) or STAT.S_ISSOCK(mode) or\
            STAT.S_ISDOOR(mode) or STAT.S_ISPORT(mode) or\
            STAT.S_ISWHT(mode) else \
        None
    if typ is None:
        raise Exception('unknown type')
    return typ
 

In [3]:
# получить список корневых папок для отслеживания
def get_root_dirs():
    dirs = []
    for rd in os.listdir(path='/'):
        if rd in ['media','cdrom','mnt','proc','sys','dev','run']:
            continue
        mode = os.stat('/'+rd,follow_symlinks=False).st_mode
        if STAT.S_ISDIR(mode) and not STAT.S_ISLNK(mode):
            dirs.append(rd)
    return dirs


In [4]:
get_root_dirs()

['srv',
 'bin.usr-is-merged',
 'home',
 'tmp',
 'etc',
 'boot',
 'var',
 'sbin.usr-is-merged',
 'snap',
 'root',
 'lost+found',
 'usr',
 'opt',
 'lib.usr-is-merged']

In [5]:
def path2ids(path,cursor):
    ids = []
    cur_id = 0
    for name in path.split('/'):
        if name=='': continue
        n = cursor.execute('SELECT id FROM cur_dirs WHERE parent_id = ? AND name = ?',(cur_id,name)).fetchone()
        if n is None:
            return ids+[None]
            #raise Exception(f"can't find {name} in {cur_id}")
        cur_id = n[0]
        ids.append(cur_id)
    return ids
    

In [6]:
def id2path(fid,cursor):
    path = ''
    while fid!=0:
        n = cursor.execute('SELECT parent_id, name FROM cur_dirs WHERE id = ? ',(fid,)).fetchone()
        assert n is not None
        path = '/'+n[1]+path
        fid = n[0]
    return path

In [30]:
def is_modified(fid, cursor):
    n = cursor.execute('SELECT modified FROM cur_dirs WHERE id = ?',(fid,)).fetchone()
    if n is None: raise Exception(f"can't find fid {fid}")
    return n[0]==1

def set_modified(fid, cursor):
    if fid==0: return
    n = cursor.execute('SELECT parent_id, modified FROM cur_dirs WHERE id = ?',(fid,)).fetchone()
    if n is None: raise Exception(f"can't find fid {fid}")
    if n[1]==0:
        #print('set_modified', fid)
        cursor.execute('UPDATE cur_dirs SET modified = 1 WHERE id = ?',(fid,))
        set_modified(n[0], cursor)
    

In [8]:
if 0:
    CON = sqlite3.connect('files1.db')
    ROOT_DIRS = get_root_dirs()
if 1:
    CON = sqlite3.connect('files2.db')
    ROOT_DIRS = [os.getcwd()]

In [45]:
def ls(fid):
    print(('parent_id','name','id','type','modified'),
          ('id','type',
           'st_mode','t_ino','st_dev','st_nlink','st_uid','st_gid','st_size','st_atime','st_mtime','st_ctime','st_blocks','st_blksize',
            'data','owner'))
    print('----------------------')
    with CON:
        if type(fid)is str:
            fid = path2ids(fid,CON)[-1]
        n = CON.execute('SELECT * FROM cur_dirs WHERE parent_id = ?',(fid,)).fetchall()
        for t in n:
            print(t,CON.execute('SELECT * FROM cur_stat WHERE id = ?',(t[2],)).fetchone())
        

In [10]:
os.getcwd()

'/home/feelus/pyfiles/src'

In [11]:
def check_db_exist():
    with CON:
        tables = list(CON.execute("SELECT name FROM sqlite_master").fetchall())
        print(tables)
        return ('cur_dirs',) in tables

In [12]:
INIT_MODE = not check_db_exist()
INIT_MODE

[]


True

In [14]:
if not INIT_MODE:
    with CON:
        print(CON.execute('SELECT * FROM cur_dirs').fetchall()[:100])

In [15]:
def create_root(path,cursor):
    ids = path2ids(path,cursor)
    assert ids[-1] is None
    fid = 0 if len(ids)==1 else ids[-2]

    # рассчитываем, что src_path - обсолютный путь, не симлинк, не содержит // типа '/a//b/c'
    path = path.split('/')

    #print(ids,fid,path)
    for name in path[len(ids):-1]:
        cursor.execute('INSERT INTO cur_dirs (parent_id, name, modified, type) VALUES (?, ?, 2, ?)',(fid, name, MDIR))
        (fid,) = cursor.execute('SELECT id FROM cur_dirs WHERE parent_id =? AND name=?',(fid,name)).fetchone()
    cursor.execute('INSERT INTO cur_dirs (parent_id, name, modified, type) VALUES (?, ?, 0, ?)',(fid, path[-1], MDIR))
    (fid,) = cursor.execute('SELECT id FROM cur_dirs WHERE parent_id =? AND name=?',(fid,path[-1])).fetchone()
    return fid


In [16]:
if INIT_MODE:
    with CON:
        if CON.in_transaction:
            CON.execute('COMMIT')
        CON.execute('DROP TABLE IF EXISTS cur_dirs')
        CON.execute('''
        CREATE TABLE cur_dirs (
            parent_id INTEGER NOT NULL,           /* id папки, в которой лежит данный объект */
            name TEXT NOT NULL,                   /* имя объекта в папке */
            id INTEGER PRIMARY KEY AUTOINCREMENT, /* идентификатор объекта во всей БД */
            type INTEGER NOT NULL,                /* MFILE, MDIR, MLINK, MOTHER */
            modified INTEGER NOT NULL,            /* параметр обхода:
                0 - заходим при полном обходе
                1 - заходим приобходе модифицированных объектов
                2 - по таблице заходим всегда, но в ФС никогда не просматриваем (и даже stat не делаем) */
        UNIQUE(parent_id, name)
        )
        ''')
        create_root('/boot',CON)
    
        print(CON.execute('SELECT * FROM cur_dirs').fetchall())
    
        create_root('/home',CON)
        print(CON.execute('SELECT * FROM cur_dirs').fetchall())
        CON.execute('DROP TABLE cur_dirs')
        CON.execute('COMMIT')

[(0, 'boot', 1, 1, 0)]
[(0, 'boot', 1, 1, 0), (0, 'home', 2, 1, 0)]


In [17]:
def init_cur_dirs(root_dirs):
    with CON:
        CON.execute('''
        CREATE TABLE cur_dirs (
            parent_id INTEGER NOT NULL,           /* id папки, в которой лежит данный объект */
            name TEXT NOT NULL,                   /* имя объекта в папке */
            id INTEGER PRIMARY KEY AUTOINCREMENT, /* идентификатор объекта во всей БД */
            type INTEGER NOT NULL,                /* MFILE, MDIR, MLINK, MOTHER */
            modified INTEGER NOT NULL,            /* параметр обхода:
                0 - заходим при полном обходе
                1 - заходим приобходе модифицированных объектов
                2 - по таблице заходим всегда, но в ФС никогда не просматриваем (и даже stat не делаем) */
        UNIQUE(parent_id, name)
        )
        ''')
        CON.execute('CREATE INDEX id_cur_dirs ON cur_dirs (id)')
        CON.execute('CREATE INDEX parname_cur_dirs ON cur_dirs (parent_id, name)')

        for root_dir in tqdm(root_dirs):
            #print(root_dir)
            create_root(root_dir,CON)
            for root, dirs, files in os.walk('/'+root_dir):
                pathids = path2ids(root,CON)
                assert pathids[-1] is not None
                #print(root,pathids,dirs)
                CON.executemany('INSERT INTO cur_dirs (parent_id, name, modified, type) VALUES (?, ?, 0, ?)', [(pathids[-1], x, MDIR) for x in dirs])
                CON.executemany('INSERT INTO cur_dirs (parent_id, name, modified, type) VALUES (?, ?, 0, ?)', [(pathids[-1], x, MDIR) for x in files])

        

In [18]:
if INIT_MODE:
    with CON:
        CON.execute('DROP TABLE IF EXISTS cur_dirs; ')
    start = time()
    init_cur_dirs(ROOT_DIRS)
    print(time() - start)
# ~ 2min

100%|████████████████████████████████████████████| 1/1 [00:00<00:00, 139.39it/s]

0.4729197025299072





In [19]:
with CON:
    dirs = CON.execute('SELECT * FROM cur_dirs').fetchall()
    # Выводим результаты
    print(len(dirs))
    for file in dirs[0:1000]:
        print(file)
    

86
(0, 'home', 1, 1, 2)
(1, 'feelus', 2, 1, 2)
(2, 'pyfiles', 3, 1, 2)
(3, 'src', 4, 1, 0)
(4, '.ipynb_checkpoints', 5, 1, 0)
(4, '.git', 6, 1, 0)
(4, 'Untitled.ipynb', 7, 1, 0)
(4, 'files.db', 8, 1, 0)
(4, 'notify.py', 9, 1, 0)
(4, 'backup.sql', 10, 1, 0)
(4, '.gitignore', 11, 1, 0)
(4, 'pyfiles.py', 12, 1, 0)
(4, 'files2.db-journal', 13, 1, 0)
(4, 'files1.db', 14, 1, 0)
(4, 'test.db', 15, 1, 0)
(4, 'test', 16, 1, 0)
(4, 'files2.db', 17, 1, 0)
(5, 'Untitled-checkpoint.ipynb', 18, 1, 0)
(5, 'pyfiles-checkpoint.py', 19, 1, 0)
(6, 'refs', 20, 1, 0)
(6, 'objects', 21, 1, 0)
(6, 'info', 22, 1, 0)
(6, 'logs', 23, 1, 0)
(6, 'hooks', 24, 1, 0)
(6, 'branches', 25, 1, 0)
(6, 'COMMIT_EDITMSG', 26, 1, 0)
(6, 'index', 27, 1, 0)
(6, 'description', 28, 1, 0)
(6, 'config', 29, 1, 0)
(6, 'HEAD', 30, 1, 0)
(20, 'heads', 31, 1, 0)
(20, 'tags', 32, 1, 0)
(31, 'master', 33, 1, 0)
(21, 'pack', 34, 1, 0)
(21, '10', 35, 1, 0)
(21, '23', 36, 1, 0)
(21, 'fa', 37, 1, 0)
(21, '26', 38, 1, 0)
(21, '45', 39, 1, 0)

In [20]:
def list_modified():
    with CON:
        def my_walk(did,path):
            n = CON.execute('SELECT name,id,type,modified FROM cur_dirs WHERE parent_id = ? AND (modified = 1 OR modified = 2)',(did,)).fetchall()
            for name,fid,ftype,modified in n:
                if ftype==MDIR and modified !=0:
                    if modified==1:
                        print(path+'/'+name+'/')
                    my_walk(fid,path+'/'+name)
                else:
                    print(path+'/'+name)
        my_walk(0,'')
list_modified()

In [21]:
with CON:
    print(path2ids('/boot',CON))

[None]


In [22]:
with CON:
    print(id2path(3,CON))

/home/feelus/pyfiles


In [23]:
if INIT_MODE:
    with CON:
        if CON.in_transaction:
            CON.execute('COMMIT')
        CON.execute('DROP TABLE IF EXISTS cur_stat')
        CON.execute('''
        CREATE TABLE IF NOT EXISTS cur_stat  (
        id INTEGER PRIMARY KEY,
        type INTEGER NOT NULL,
        
        st_mode INTEGER, /* поля stat */
        st_ino  INTEGER,
        st_dev  INTEGER,
        st_nlink  INTEGER,
        st_uid  INTEGER,
        st_gid  INTEGER,
        st_size  INTEGER,
        st_atime  REAL,
        st_mtime  REAL,
        st_ctime REAL,
        st_blocks INTEGER,
        st_blksize INTEGER,
        
        data TEXT, /* для файлов - хэш, для папок - хэш = xor хэшей вложенных объектов, для симлинков - сама ссылка */
        owner INTEGER /*  */
        )
        ''')
        

In [24]:
with CON:
    # для файлов и папок data это hash, для симлинков - ссылка
    CON.execute('CREATE INDEX IF NOT EXISTS id_cur_stat ON cur_stat (id)')

In [25]:
# do stat первый раз
if INIT_MODE:
    with CON:
        # Выбираем всех пользователей
        ids = CON.execute('SELECT id FROM cur_dirs WHERE modified !=2').fetchall()
        cnt = 0
        for fid in tqdm(ids):
            fid = fid[0]
            path = None
            try:
                path = id2path(fid,CON)
                stat = os.stat(path,follow_symlinks=False)
                if not is_link(stat.st_mode) and not is_dir(stat.st_mode) and not is_file(stat.st_mode) and not is_other(stat.st_mode):
                    raise Exception('unknown type')
                    
                CON.execute('UPDATE cur_dirs SET type = ? WHERE id = ?',(simple_type(stat.st_mode),fid))
                CON.execute('''INSERT INTO cur_stat 
                (id,type,
                st_mode,st_ino,st_dev,st_nlink,st_uid,st_gid,st_size,
                st_atime,st_mtime,st_ctime,st_blocks,st_blksize) VALUES (?,?,?,?,?,?,?,?,?,?,?,?,?,?)''',
                              (fid,simple_type(stat.st_mode),
                               stat.st_mode,stat.st_ino,stat.st_dev,stat.st_nlink,stat.st_uid,stat.st_gid,stat.st_size,
                               stat.st_atime,stat.st_mtime,stat.st_ctime,stat.st_blocks,stat.st_blksize)
                              )
                cnt+=1
                if cnt%1000000==0:
                    CON.execute('COMMIT')
            except FileNotFoundError:
                #print(e)
                set_modified(fid, cursor)
            except Exception as e:
                print(fid,path,type(e),e)
    # 45 s
list_modified()

100%|█████████████████████████████████████████| 83/83 [00:00<00:00, 9397.16it/s]


In [26]:
# calc hashes of files
if INIT_MODE:
    with CON:
        cursor = CON.cursor()
        ids = cursor.execute('SELECT id FROM cur_stat WHERE type = ?',(MFILE,)).fetchall()
        cnt = 0
        for fid in tqdm(ids):
            fid = fid[0]
            path = None
            try:
                path = id2path(fid,cursor)
                hsh = hashlib.md5(open(path,'rb').read()).hexdigest()
                cursor.execute('UPDATE cur_stat SET data = ? WHERE id = ?',(hsh,fid))
                cnt+=1
                if cnt%1000000==0:
                    cursor.execute('COMMIT')
            except FileNotFoundError:
                set_modified(fid, cursor)
            except Exception as e:
                print(fid,path,type(e),e)
list_modified()

100%|███████████████████████████████████████████| 52/52 [00:07<00:00,  7.24it/s]


In [27]:
with CON:
    ids = CON.execute('UPDATE cur_stat SET data = "00000000000000000000000000000000" WHERE type = ?',(MOTHER,)).fetchall()

In [28]:
with CON:
    print(*CON.execute('PRAGMA integrity_check;').fetchone())
list_modified()

ok


In [29]:
# обновить симлинки, директории, сынтегрировать хеши
if INIT_MODE:
    with CON:
        cursor = CON.cursor()
        def my_walk(did,root):
            n = cursor.execute('SELECT name,id,type,modified FROM cur_dirs WHERE parent_id = ? ',(did,)).fetchall()
            if not root:
                try:
                    if len(n)!=len(os.listdir(id2path(did,cursor))):
                        set_modified(did, cursor)
                except FileNotFoundError:
                    set_modified(did, cursor)
            hsh = 0
            for name,fid,ftype,modified in n:
                if ftype==MFILE:
                    n = cursor.execute('SELECT data FROM cur_stat WHERE id = ?',(fid,)).fetchone()
                    if n is None: raise Exception(f"can't find fid {fid} in cur_stat_file")
                    lhsh = n[0]
                elif ftype==MLINK:
                    try:
                        lnk = os.readlink(id2path(fid,cursor))
                        lhsh = hashlib.md5(lnk.encode()).hexdigest()
                        cursor.execute('UPDATE cur_stat SET data = ? WHERE id = ?',(lnk,fid))
                    except FileNotFoundError:
                        set_modified(fid, cursor)
                        lhsh = None
                elif ftype==MDIR:
                    lhsh = my_walk(fid,modified==2)
                elif ftype==MOTHER:
                    lhsh = hex( 0 )[2:].zfill(32)
                else: 
                    assert False, (name,fid,ftype)
                    
                if lhsh is None:
                    hsh = None
                if hsh is not None:
                    hsh ^= int(lhsh, 16)
    
            if hsh is not None:
                hsh = hex( hsh )[2:].zfill(32)
                if not root:
                    cursor.execute('UPDATE cur_stat SET data = ? WHERE id = ?',(hsh,did))
            return hsh
        my_walk(0,True)     
list_modified()

set_modified 4
/home/feelus/pyfiles/src/


In [31]:
def update_stat(fid, stat, cursor):
    cursor.execute('''UPDATE cur_stat SET
        st_mode=?,st_ino=?,st_dev=?,st_nlink=?,st_uid=?,st_gid=?,st_size=?,
        st_atime=?,st_mtime=?,st_ctime=?,st_blocks=?,st_blksize=?''',
        (stat.st_mode,stat.st_ino,stat.st_dev,stat.st_nlink,stat.st_uid,stat.st_gid,stat.st_size,
        stat.st_atime,stat.st_mtime,stat.st_ctime,stat.st_blocks,stat.st_blksize)
    )
def get_stat(fid, cursor):
    (st_mode,st_ino,st_dev,st_nlink,st_uid,st_gid,st_size,
        st_atime,st_mtime,st_ctime,st_blocks,st_blksize) = \
    cursor.execute('''SELECT 
        st_mode,st_ino,st_dev,st_nlink,st_uid,st_gid,st_size,
        st_atime,st_mtime,st_ctime,st_blocks,st_blksize
        FROM cur_stat WHERE id = ?''',(fid,)
    ).fetchone()
    return make_dict(st_mode=st_mode,st_ino=st_ino,st_dev=st_dev,st_nlink=st_nlink,st_uid=st_uid,st_gid=st_gid,st_size=st_size,
                       st_atime=st_atime,st_mtime=st_mtime,st_ctime=st_ctime,st_blocks=st_blocks,st_blksize=st_blksize)

with CON:
    ost = get_stat(45,CON)
    print(ost)
    update_stat(45,os.stat('/home'),CON)
    print(get_stat(45,CON))
    update_stat(45,ost,CON)
    print(get_stat(45,CON))
    

{'st_mode': 16893, 'st_ino': 1346764, 'st_dev': 2052, 'st_nlink': 2, 'st_uid': 1000, 'st_gid': 1000, 'st_size': 4096, 'st_atime': 1737208359.3988495, 'st_mtime': 1737091280.9950309, 'st_ctime': 1737091280.9950309, 'st_blocks': 8, 'st_blksize': 4096}
{'st_mode': 16877, 'st_ino': 1048577, 'st_dev': 2052, 'st_nlink': 3, 'st_uid': 0, 'st_gid': 0, 'st_size': 4096, 'st_atime': 1737189243.211488, 'st_mtime': 1729599119.2648318, 'st_ctime': 1729599119.2648318, 'st_blocks': 8, 'st_blksize': 4096}
{'st_mode': 16893, 'st_ino': 1346764, 'st_dev': 2052, 'st_nlink': 2, 'st_uid': 1000, 'st_gid': 1000, 'st_size': 4096, 'st_atime': 1737208359.3988495, 'st_mtime': 1737091280.9950309, 'st_ctime': 1737091280.9950309, 'st_blocks': 8, 'st_blksize': 4096}


In [32]:
with CON:
    print(CON.execute('SELECT * FROM cur_dirs WHERE modified = 1').fetchall())

[(3, 'src', 4, 1, 1)]


In [33]:
list_modified()

/home/feelus/pyfiles/src/


In [34]:
with CON:
    print(CON.execute('SELECT * FROM cur_dirs WHERE modified = 2').fetchall())

[(0, 'home', 1, 1, 2), (1, 'feelus', 2, 1, 2), (2, 'pyfiles', 3, 1, 2)]


In [35]:
with CON:
    # todo описание, таймаут, логгирование изменений (diff) для особо выжных файлов

    if CON.in_transaction:
        CON.execute('COMMIT')
    CON.execute('''
    CREATE TABLE IF NOT EXISTS owners  (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        name TEXT NOT NULL, /* например система-код система-логи программа-код, программа-конфиг, программа-данные, человек-проект */
        save INTEGER NOT NULL, /* bool - сохранять ли данные об изменении этого объекта в hist */
        name1 TEXT, /* если у объекта несколько владельцев, то для каждой группы владельцев свой id, имена через запятую */
        name2 TEXT, /* а здесь имена каждого владельца по отдельности */
        name3 TEXT,
        name4 TEXT,
        name5 TEXT,
        UNIQUE(name)
    )
    ''')
    CON.execute('CREATE INDEX IF NOT EXISTS id_owners ON owners (id)')
    CON.execute('CREATE INDEX IF NOT EXISTS name_owners ON owners (name)')
    

In [40]:
def update_owner(name,save):
    with CON:
        CON.execute('''INSERT INTO owners (name, save) VALUES (?, ?) ON CONFLICT(name) DO UPDATE SET
            name = excluded.name,    save = excluded.save ''', (name,save))
def set_owner(path,name):
    # todo добавить флаг, чтобы если есть поддерево с уже установленным владельцем - не менять его
    with CON:
        cursor = CON.cursor()
        if name is not None:
            (oid,) = cursor.execute('SELECT id FROM owners WHERE name = ?',(name,)).fetchone()
        else:
            oid = None
        def my_walk(did):
            n = cursor.execute('SELECT name,id,type FROM cur_dirs WHERE parent_id = ? ',(did,)).fetchall()
            for name,fid,ftype in n:
                cursor.execute('UPDATE cur_stat SET owner = ? WHERE id = ?',(oid,fid))
                if ftype==MDIR:
                    my_walk(fid)
        fid = path2ids(path,cursor)[-1]
        cursor.execute('UPDATE cur_stat SET owner = ? WHERE id = ?',(oid,fid))
        (typ,) = cursor.execute('SELECT type FROM cur_dirs WHERE id = ?',(fid,)).fetchone()
        if typ==MDIR:
            my_walk(fid)


In [47]:
update_owner('name',False)
update_owner('git',True)

with CON:
    print(CON.execute('SELECT * FROM owners').fetchall())

[(1, 'name', 0, None, None, None, None, None), (3, 'git', 1, None, None, None, None, None)]


In [48]:
set_owner('/home/feelus/pyfiles/src/.git','git')

In [49]:
ls('/home/feelus/pyfiles/src/.git')

('parent_id', 'name', 'id', 'type', 'modified') ('id', 'type', 'st_mode', 't_ino', 'st_dev', 'st_nlink', 'st_uid', 'st_gid', 'st_size', 'st_atime', 'st_mtime', 'st_ctime', 'st_blocks', 'st_blksize', 'data', 'owner')
----------------------
(6, 'COMMIT_EDITMSG', 26, 0, 0) (26, 0, 16893, 1346764, 2052, 2, 1000, 1000, 4096, 1737208359.3988495, 1737091280.9950309, 1737091280.9950309, 8, 4096, '6683eecac67610f0bfad4a8ca9fdf982', 3)
(6, 'HEAD', 30, 0, 0) (30, 0, 16893, 1346764, 2052, 2, 1000, 1000, 4096, 1737208359.3988495, 1737091280.9950309, 1737091280.9950309, 8, 4096, '4cf2d64e44205fe628ddd534e1151b58', 3)
(6, 'branches', 25, 1, 0) (25, 1, 16893, 1346764, 2052, 2, 1000, 1000, 4096, 1737208359.3988495, 1737091280.9950309, 1737091280.9950309, 8, 4096, '00000000000000000000000000000000', 3)
(6, 'config', 29, 0, 0) (29, 0, 16893, 1346764, 2052, 2, 1000, 1000, 4096, 1737208359.3988495, 1737091280.9950309, 1737091280.9950309, 8, 4096, '5b603c2c0801a9ded3b79159fc38b404', 3)
(6, 'description', 28

In [50]:
ECREAT = 1 # в этом случае все старые записи == -1
EMODIF = 2
EMOVE = 3
EDEL = 4
with CON:
    cursor = CON.cursor()

    # на id может быть несколько записей
    # если объект создан, то поля старого stat = -1
    #cursor.execute('DROP TABLE hist_crmod')
    cursor.execute('''
    CREATE TABLE IF NOT EXISTS hist  (
    parent_id INTEGER NOT NULL, /* старая запись из cur_dirs */
    name TEXT NOT NULL, /* старая запись из cur_dirs */
    id INTEGER NOT NULL, /* на id может быть несколько записей */
    type INTEGER NOT NULL, 
    event_type INTEGER NOT NULL, /* ECREAT, EMODIF, EMOVE, EDEL */
    
    st_mode INTEGER, /* старая запись из cur_stat */
    st_ino  INTEGER,
    st_dev  INTEGER,
    st_nlink  INTEGER,
    st_uid  INTEGER,
    st_gid  INTEGER,
    st_size  INTEGER,
    st_atime  REAL,
    st_mtime  REAL,
    st_ctime REAL,
    st_blocks INTEGER,
    st_blksize INTEGER,
    data TEXT, /* старая запись из cur_stat */
    owner INTEGER, /* старая запись из cur_stat */

    time REAL NOT NULL, /* время события */
    static_found INTEGER NOT NULL /* 0 - обнаружено watchdog-ом, 1 - статитсеский обход дерева каталогов */
    )
    ''')
    cursor.execute('CREATE INDEX IF NOT EXISTS id_hist ON hist (id)')
    cursor.execute('CREATE INDEX IF NOT EXISTS time_hist ON hist (time)')

def add_event(fid, typ, etyp, static_found, cursor):
    if etyp==ECREAT:
        cursor.execute('''INSERT INTO hist (parent_id, name, id, type, event_type, 
            st_mode,st_ino,st_dev,st_nlink,st_uid,st_gid,st_size,
            st_atime,st_mtime,st_ctime,st_blocks,st_blksize,data,owner,
            time,static_found) VALUES (-1,-1,?,?,?,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,?,?)''',
                       (fid,typ,etyp,time(),static_found))
    else:
        # просто чать данных копируем а чать заполняем вручную
        if typ is not None:
            (otyp,) = cursor.execute('SELECT type FROM cur_dirs WHERE id = ?',(fid,)).fetchone()
            assert typ == otyp , (typ, otyp)
        cursor.execute('''INSERT INTO hist (parent_id, name, id, type, event_type, 
            st_mode,st_ino,st_dev,st_nlink,st_uid,st_gid,st_size,
            st_atime,st_mtime,st_ctime,st_blocks,st_blksize,data,owner,
            time,static_found) 
            SELECT t1.parent_id, t1.name, ?, ?, ?, 
            t2.st_mode,t2.st_ino,t2.st_dev,t2.st_nlink,t2.st_uid,t2.st_gid,t2.st_size,
            t2.st_atime,t2.st_mtime,t2.st_ctime,t2.st_blocks,t2.st_blksize,t2.data,t2.owner,
            ?,?
            FROM cur_dirs AS t1
            JOIN cur_stat AS t2
            ON 1=1
            WHERE t1.id = ? AND t2.id = ?
            ''',
                       (fid,typ,etyp,time(),static_found,fid,fid)
        )
            # INSERT INTO table_target (col1, col2, col3, col4, col5)
            # SELECT t1.col1, t1.col2, t2.col3, t2.col4, ?
            # FROM table1 AS t1
            # JOIN table2 AS t2
            # ON 1=1
            # WHERE t1.id = ? AND t2.id = ?;        
        

In [51]:
with CON:
    add_event(2, MOTHER, ECREAT, False, CON)
    add_event(5, MDIR, EDEL, True, CON)
    print(CON.execute('SELECT * FROM hist').fetchall())
    CON.execute('DELETE FROM hist')
    print(CON.execute('SELECT * FROM hist').fetchall())
    

[(-1, '-1', 2, 3, 1, -1, -1, -1, -1, -1, -1, -1, -1.0, -1.0, -1.0, -1, -1, '-1', -1, 1737258047.4243934, 0), (4, '.ipynb_checkpoints', 5, 1, 4, 16893, 1346764, 2052, 2, 1000, 1000, 4096, 1737208359.3988495, 1737091280.9950309, 1737091280.9950309, 8, 4096, '3589b2ff9683a4f362625f173839c744', None, 1737258047.4260247, 1)]
[]


In [52]:
# не взаимодействуем с ФС, кроме create_parents

# todo написать тесты отдельно от цикла
def owner_save(fid,cursor):
    (owner,) = cursor.execute('SELECT owner FROM cur_stat WHERE id = ?',(fid,)).fetchone()
    if owner is not None:
        (save,) = cursor.execute('SELECT save FROM owners WHERE id = ?',(owner,)).fetchone()
    else:
        save = True
    return (owner,save)


# исхоодый объект известен, целевой объект известен, работаем с БД
def modified1(fid, stat, is_directory, cursor):
    already = is_modified(fid, cursor)
    if not already: set_modified(fid, cursor)

    (owner,save) = owner_save(fid,cursor)
    if save:
        # cохранить старый stat. с датой. 
        # для файла - если дата больше 10 сек
        # для папки - если изменился её stat (st_atime, st_mtime не учитываем)
        ostat = cursor.execute('''SELECT 
            id, type,
            st_mode,st_ino,st_dev,st_nlink,st_uid,st_gid,st_size,
            st_atime,st_mtime,st_ctime,st_blocks,st_blksize,
            data,time
        FROM hist WHERE id = ? ORDER BY time DESC LIMIT 1;''',(fid,)).fetchone()
        if ostat is not None: # раньше этот файл уже обновлялся
            (ofid,otype,
                           ost_mode,ost_ino,ost_dev,ost_nlink,ost_uid,ost_gid,ost_size,
                           ost_atime,ost_mtime,ost_ctime,ost_blocks,ost_blksize, 
                odata, otime ) = ostat
            if simple_type(stat.st_mode)==MDIR:
                save = (stat.st_mode,stat.st_ino,stat.st_dev,stat.st_nlink,stat.st_uid,stat.st_gid,stat.st_size,
                       stat.st_ctime,stat.st_blocks,stat.st_blksize) != \
                (ost_mode,ost_ino,ost_dev,ost_nlink,ost_uid,ost_gid,ost_size,
                           ost_ctime,ost_blocks,ost_blksize,)
            else:
                save = abs(otime - time())>10
        else:
            save = True
        if save:
            add_event(fid, simple_type(stat.st_mode), EMODIF, False, cursor)
    
    # обновить stat в cur
    update_stat(fid,stat,cursor)
    
def created2(parent_id, name, stat, static_find, save, owner, cursor):
    set_modified(parent_id, cursor)
    cursor.execute('INSERT INTO cur_dirs (parent_id, name, modified, type) VALUES (?, ?, 1, ?)',(parent_id, name, simple_type(stat.st_mode)))
    (fid,) = cursor.execute('SELECT id FROM cur_dirs WHERE parent_id =? AND name=?',(parent_id,name)).fetchone()
    if save:
        add_event(fid, simple_type(stat.st_mode), ECREAT, static_find, cursor)
    # обновить stat в cur
    cursor.execute('INSERT INTO cur_stat (id,type,owner) VALUES (?,?,?)',(fid,simple_type(stat.st_mode),owner))
    update_stat(fid,stat,cursor)
    return fid
    
def deleted1(fid,cursor):
    (owner,save) = owner_save(fid,cursor)

    def my_walk(did):
        n = cursor.execute('SELECT name,id,type FROM cur_dirs WHERE parent_id = ? ',(did,)).fetchall()
        for name,fid,ftype in n:
            if ftype==MDIR:
                my_walk(fid)
            if save:
                add_event(fid, None, EDEL, True, cursor)
            cursor.execute('DELETE FROM cur_stat WHERE id = ?',(fid,))
            cursor.execute('DELETE FROM cur_dirs WHERE id = ?',(fid,))
            
    my_walk(fid)
    if save:
        add_event(fid, None, EDEL, False, cursor)
    cursor.execute('DELETE FROM cur_stat WHERE id = ?',(fid,))
    cursor.execute('DELETE FROM cur_dirs WHERE id = ?',(fid,))

def create_parents(path,cursor,ids=None):
    if ids is None:
        ids = path2ids(path,cursor)
        
    # рассчитываем, что src_path - обсолютный путь, не симлинк, не содержит // типа '/a//b/c'
    path = path.split('/')

    fid = ids[-2]
    (owner,save) = owner_save(fid,cursor)

    parent_path = '/'.join(path[:len(ids)])
    for name in path[len(ids):-1]:
        parent_path+= ('/'+name)
        lstat = os.stat(parent_path, follow_symlinks=False) # FileNotFoundError будет пойман в области watchdog-а
        assert simple_type(lstat.st_mode)==MDIR, simple_type(lstat.st_mode)
        fid = created2(fid, name, lstat, True, save, owner, cursor)

    return fid, save, owner, path[-1]

def created1(ids, src_path, stat, is_directory, owner, cursor):
    (fid, save, owner, name) = create_parents(src_path,cursor,ids)
    created2(fid, name, stat, False, save, owner, cursor)

def moved1(fid, dest_path, cursor):
    (parent_id, _, _, name) = create_parents(dest_path,cursor)
    (_,save) = owner_save(fid,cursor)
    if save:
        add_event(fid, None, EMOVE, False, cursor)
    cursor.execute('UPDATE cur_dirs SET parent_id = ?, name = ? WHERE id = ?',(parent_id, name, fid))



In [53]:
def modified(src_path, stat, is_directory, is_synthetic, cursor):
    if is_synthetic:
        print('synthetic modified',src_path, is_directory)
        return
    ids = path2ids(src_path,cursor)
    if ids[-1] is None:
        print('do modified as created',src_path, time())
        return created1(ids, src_path, is_directory)
    return modified1(ids[-1], stat, is_directory)

def created(src_path, stat, is_directory, is_synthetic, cursor):
    if is_synthetic:
        print('synthetic created',src_path, is_directory)
        return
    ids = path2ids(src_path,cursor)
    if ids[-1] is not None:
        # если было удалено, но это не было зафиксировано, а потом создалось - считаем, что просто изменилось
        print('do created as modified',src_path, time())
        return modified1(ids[-1], src_path, is_directory)
    return created1(ids, src_path, stat, is_directory)

def deleted(src_path, is_directory, s_synthetic, cursor):
    if is_synthetic:
        print('synthetic deleted',src_path, is_directory)
        return
    ids = path2ids(src_path,cursor)
    if ids[-1] is not None:
        print('deleted unknown object:',src_path)
        return
    deleted1(ids[-1],cursor)

def moved(src_path, dest_path, stat, is_directory, s_synthetic, cursor):
    if is_synthetic:
        print('synthetic moved',src_path, dest_path, is_directory)
        return
    ids = path2ids(src_path,cursor)
    if ids[-1] is not None:
        print('do moved as created',src_path, dest_path, time())
        return created1(ids, dest_path, is_directory)
    moved1(ids[-1],dest_path, cursor)


In [27]:
# взаимодействуем с ФС
from watchdog.events import FileSystemEvent, FileSystemEventHandler
from watchdog.observers import Observer
import threading
lock = threading.Lock()
from time import time, sleep

# todo список полностью игнорируемых путей
if 1:
    def modified(src_path, is_directory, is_synthetic, cursor):
        print('modified',src_path)
    
    def created(src_path, is_directory, is_synthetic, cursor):
        print('created',src_path)
    
    def deleted(src_path, is_directory, s_synthetic, cursor):
        print('deleted',src_path)
    
    def moved(src_path, dest_path, is_directory, s_synthetic, cursor):
        print('moved',src_path)
    
class MyEventHandler(FileSystemEventHandler):
    def update(self):
        if time()-self.last_update>60:
            if self.connection.in_transaction:
                self.cursor.execute('COMMIT')
                print('COMMIT',self.last_update)
            self.last_update = time()
    def on_any_event(self, event: FileSystemEvent) -> None:
        if event.event_type=='closed_no_write':
            pass
        elif event.event_type=='opened':
            pass
        elif event.event_type=='modified' or event.event_type=='closed':
            try:
                stat = os.stat(event.src_path,follow_symlinks=False)
                with lock:
                    modified(event.src_path, stat, event.is_directory, event.is_synthetic, self.cursor)
                    self.update()
            except FileNotFoundError as e:
                print('error in modified event:', e, event.src_path, event.is_directory, event.is_synthetic)
        elif event.event_type=='created':
            try:
                stat = os.stat(event.src_path,follow_symlinks=False)
                with lock:
                    created(event.src_path, stat, event.is_directory, event.is_synthetic, self.cursor)
                    self.update()
            except FileNotFoundError as e:
                print('error in created event:', e, event.src_path, event.is_directory, event.is_synthetic)
        elif event.event_type=='deleted':
            with lock:
                deleted(event.src_path, event.is_directory, event.is_synthetic, self.cursor)
                self.update()
        elif event.event_type=='moved':
            try:
                stat = os.stat(event.dest_path,follow_symlinks=False)
            except FileNotFoundError:
                stat = make_dict(st_mode=None,st_ino=None,st_dev=None,st_nlink=None,st_uid=None,st_gid=None,st_size=None,
                       st_atime=None,st_mtime=None,st_ctime=None,st_blocks=None,st_blksize=None)
            with lock:
                moved(event.src_path, event.dest_path, stat, event.is_directory, event.is_synthetic, self.cursor)
                self.update()
        else:
            raise Exception(event)
            
class MyObserver(Observer):
    def __init__(self, event_handler):
        super().__init__()
        self.event_handler = event_handler

    def run(self):
        self.on_observer_start()  # Вызываем функцию при старте и передаем переменную в обработчик
        try:
            super().run()  # Запускаем стандартный обработчик
        finally:
            self.on_observer_stop()  # Вызываем функцию при завершении

    def on_observer_start(self):
        print("Observer запущен. ", threading.current_thread().name)

        self.event_handler.connection = sqlite3.connect(FILES_DB)
        self.event_handler.cursor = self.event_handler.connection.cursor()    
        self.event_handler.cursor.execute("PRAGMA journal_mode=WAL;")

        self.event_handler.last_update = time()

    def on_observer_stop(self):
        self.event_handler.connection.commit()
        print("Observer остановлен. ", threading.current_thread().name)

if __name__ == "__main__":
    def observe(root_dirs):
        event_handler = MyEventHandler()  # Создаем обработчик с временным значением shared_data
        observer = MyObserver(event_handler)

        for dr in root_dirs:
            observer.schedule(event_handler, '/'+dr, recursive=True)
        observer.start()
        return observer

    observer = observe(get_root_dirs())

    print("Все запущены...", threading.current_thread().name)

    try:
        while True:
            x = input()
            if x == 'q':
                break
            print(x)
    finally:
        observer.stop()  # Останавливаем Observer
        observer.join()  # Ждем завершения потока


Observer запущен. Все запущены... MainThread
 Thread-182
modified /root/.ipython/profile_default/history.sqlite-journal
modified /root/.ipython/profile_default/history.sqlite
modified /root/.ipython/profile_default/history.sqlite-journal
modified /root/.ipython/profile_default
deleted /root/.ipython/profile_default/history.sqlite-journal
modified /root/.ipython/profile_default
modified /home/feelus/snap/firefox/common/.cache/mozilla/firefox/2yebzj4a.default/cache2/entries/F7B4C542C6E1E26E0E409039C4214EF6DA3BE75F
modified /home/feelus/snap/firefox/common/.cache/mozilla/firefox/2yebzj4a.default/cache2/entries/F7B4C542C6E1E26E0E409039C4214EF6DA3BE75F
modified /home/feelus/snap/firefox/common/.cache/mozilla/firefox/2yebzj4a.default/cache2/entries
modified /home/feelus/snap/firefox/common/.mozilla/firefox/2yebzj4a.default/storage-sync-v2.sqlite-wal
modified /home/feelus/snap/firefox/common/.mozilla/firefox/2yebzj4a.default/storage/default/moz-extension+++9af6ae16-950a-4ac8-b634-8f433c84e0ae

 q


Observer остановлен.  Thread-182


In [20]:
f = open('test','w')

In [21]:
f.close()

In [22]:
connection = sqlite3.connect('test.db')


In [23]:
connection.close()

In [24]:
with sqlite3.connect('test.db') as connection:
    pass

In [22]:
os.stat('.')

os.stat_result(st_mode=16893, st_ino=1318666, st_dev=2052, st_nlink=4, st_uid=1000, st_gid=1000, st_size=4096, st_atime=1737211130, st_mtime=1737211130, st_ctime=1737211130)

In [23]:
os.stat('/bin',follow_symlinks=False)

os.stat_result(st_mode=41471, st_ino=12, st_dev=2052, st_nlink=1, st_uid=0, st_gid=0, st_size=7, st_atime=1737177301, st_mtime=1713791283, st_ctime=1729561328)

In [24]:
def access_mode(st_mode):
    mode = STAT.S_IMODE(st_mode)
    assert mode < 2**9
    s = ''
    for i in range(6,-1,-3):
        s+= 'r' if mode & 2**(i+2) else '-'
        s+= 'w' if mode & 2**(i+1) else '-'
        s+= 'x' if mode & 2**(i+0) else '-'
    return s

In [25]:
access_mode(16877)

'rwxr-xr-x'

In [26]:
os.readlink('/bin')

'usr/bin'

In [27]:
hashstr = hashlib.md5(open('files.db','rb').read()).hexdigest()
hashstr

'1691fc18f126cb116505fa62c87ba7ae'

In [28]:
hex( int(hashstr, 16) ^ int(hashstr, 16) )[2:].zfill(32)

'00000000000000000000000000000000'

In [29]:
len(os.listdir('/home'))

2

In [30]:
bin(STAT.S_IFLNK) # 1010_0000_0000_0000

'0b1010000000000000'

In [31]:
bin(STAT.S_IFDIR) # 0100_0000_0000_0000

'0b100000000000000'

In [32]:
bin(STAT.S_IFREG) # 1000_0000_0000_0000

'0b1000000000000000'

In [33]:
print(bin(STAT.S_IFDIR))
print(bin(STAT.S_IFCHR))
print(bin(STAT.S_IFBLK))
print(bin(STAT.S_IFREG))
print(bin(STAT.S_IFIFO))
print(bin(STAT.S_IFLNK))
print(bin(STAT.S_IFSOCK))
print(bin(STAT.S_IFDOOR))

0b100000000000000
0b10000000000000
0b110000000000000
0b1000000000000000
0b1000000000000
0b1010000000000000
0b1100000000000000
0b0
