In [303]:
import os
from collections import defaultdict
import hashlib


    
def get_files_with_same_size(root_folder: str) -> dict[int, list[str]]:
    """Find files that have the same size and return them as a dict where key is size and value is list of filepaths."""
    
    d = defaultdict(list)
    
    for dirpath, _, files in os.walk(root_folder):
        for file in files:
            filepath = os.path.join(dirpath, file)  
            filesize = os.path.getsize(filepath)     
            d[filesize].append(filepath)

    return {size: paths for size, paths in d.items() if len(paths) > 1}

def calculate_hash(filepath: str) -> str:
    """Generate a hash for the file using SHA-256."""
    hasher = hashlib.sha256()
    with open(filepath, "rb") as f:
        while True:
            chunk = f.read(8192)
            if not chunk:
                break
            hasher.update(chunk)

    return hasher.hexdigest()
            
            
def get_files_with_same_hash(filepaths: list[str]) -> dict[str, list[str]]:
    """ Given list of filepaths, calculate hash for each file """
    d = defaultdict(list)
    for file in filepaths:
        d[calculate_hash(file)].append(file)
    return {hash: files for hash, files in d.items() if len(files) > 1}
    

    


In [304]:
MUSIC_ROOT_FOLDER = "/Users/lukas.kotatko/TESTING_FILESYSTEM"
for size, files in get_files_with_same_size(MUSIC_ROOT_FOLDER).items():
    print(size, get_files_with_same_hash(files))
    # get_files_with_same_hash(files)
    
    

4225949 {'2a38fcfa030ce7358b5b964d9aaa3df67bdffad14e706d9f4685c2496594a549': ['/Users/lukas.kotatko/TESTING_FILESYSTEM/12  Fjernlys Trunkene Flut.mp3', '/Users/lukas.kotatko/TESTING_FILESYSTEM/12  Fjernlys Trunkene Flut 2.mp3']}
3852339 {'e041a5c3696234e44b8cbaaa45a8a0648543ed9ffca8e017de49b3c326afd2f5': ['/Users/lukas.kotatko/TESTING_FILESYSTEM/13  Predominance Four Symbols.mp3', '/Users/lukas.kotatko/TESTING_FILESYSTEM/13  Predominance Four Symbols 2.mp3']}


In [291]:
pair = HashPathsPair("qewqewqe/we",["wqe","qq"])
pair.paths

['wqe', 'qq']

In [280]:
calculate_hash("/Users/lukas.kotatko/TESTING_FILESYSTEM/12  Fjernlys Trunkene Flut.mp3")

KeyboardInterrupt: 

In [13]:
from ffcuesplitter.cuesplitter import FFCueSplitter

# input_flac = "/Users/lukas.kotatko/TESTING_FILESYSTEM/Fragmentary/CD1/Herbst9 CD1.flac"
cue = "/Users/lukas.kotatko/TESTING_FILESYSTEM/Fragmentary/CD1/Herbst9 CD1.cue"
output = "/Users/lukas.kotatko/TESTING_FILESYSTEM/Fragmentary/CD1/"

# Create an instance of FFCueSplitter using keyword arguments
splitter = FFCueSplitter(filename=cue, outputdir=output, outputformat="flac", dry=True)

tracks = splitter.audiotracks  # Get all track data
print(tracks)

splitter.open_cuefile()

splitter.commandargs(tracks)

[{'FILE': 'Herbst9 CD1.flac', 'ALBUM': 'Herbst9 CD1', 'PERFORMER': '', 'DATE': '2015', 'GENRE': 'Dark Ambient', 'DISCID': '630E0208', 'COMMENT': 'ExactAudioCopy v1.0b6', 'TITLE': 'Трек01', 'TRACK_NUM': '1', 'INDEX 01': '00:00:00', 'START': 0, 'END': 19070604, 'DURATION': 432.44}, {'FILE': 'Herbst9 CD1.flac', 'ALBUM': 'Herbst9 CD1', 'PERFORMER': '', 'DATE': '2015', 'GENRE': 'Dark Ambient', 'DISCID': '630E0208', 'COMMENT': 'ExactAudioCopy v1.0b6', 'TITLE': 'Трек02', 'TRACK_NUM': '2', 'INDEX 01': '07:12:33', 'START': 19070604, 'END': 47432196, 'DURATION': 643.12}, {'FILE': 'Herbst9 CD1.flac', 'ALBUM': 'Herbst9 CD1', 'PERFORMER': '', 'DATE': '2015', 'GENRE': 'Dark Ambient', 'DISCID': '630E0208', 'COMMENT': 'ExactAudioCopy v1.0b6', 'TITLE': 'Трек03', 'TRACK_NUM': '3', 'INDEX 01': '17:55:42', 'START': 47432196, 'END': 66859128, 'DURATION': 440.52}, {'FILE': 'Herbst9 CD1.flac', 'ALBUM': 'Herbst9 CD1', 'PERFORMER': '', 'DATE': '2015', 'GENRE': 'Dark Ambient', 'DISCID': '630E0208', 'COMMENT': '

{'recipes': [('"ffmpeg"  -loglevel info  -i "/Users/lukas.kotatko/TESTING_FILESYSTEM/Fragmentary/CD1/Herbst9 CD1.flac" -ss 0.0 -to 432.44 -metadata ARTIST="" -metadata ALBUM="Herbst9 CD1" -metadata TITLE="Трек01" -metadata TRACK="1/8" -metadata DISCNUMBER="" -metadata GENRE="Dark Ambient" -metadata DATE="2015" -metadata COMMENT="ExactAudioCopy v1.0b6" -metadata DISCID="630E0208" -c:a flac -ar 44100  -y "./01 - Трек01.flac"',
   {'duration': 432.44, 'titletrack': '01 - Трек01.flac'}),
  ('"ffmpeg"  -loglevel info  -i "/Users/lukas.kotatko/TESTING_FILESYSTEM/Fragmentary/CD1/Herbst9 CD1.flac" -ss 432.44 -to 1075.56 -metadata ARTIST="" -metadata ALBUM="Herbst9 CD1" -metadata TITLE="Трек02" -metadata TRACK="2/8" -metadata DISCNUMBER="" -metadata GENRE="Dark Ambient" -metadata DATE="2015" -metadata COMMENT="ExactAudioCopy v1.0b6" -metadata DISCID="630E0208" -c:a flac -ar 44100  -y "./02 - Трек02.flac"',
   {'duration': 643.12, 'titletrack': '02 - Трек02.flac'}),
  ('"ffmpeg"  -loglevel info 

In [15]:
from mutagen.flac import FLAC

audio = FLAC("/Users/lukas.kotatko/TESTING_FILESYSTEM/Fragmentary/CD1/Herbst9 CD1.flac")
audio.pprint()

'FLAC, 3586.93 seconds, 44100 Hz (audio/flac)\nTITLE=Трек02\nALBUM=Herbst9 CD1\nDATE=2015\nTRACKNUMBER=02\nGENRE=Dark Ambient\nLYRICS=Warkatu I\nDISCNUMBER=1\nTOTALDISCS=1\nTOTALTRACKS=8'

In [17]:
import shutil

src = "/Users/lukas.kotatko/TESTING_FILESYSTEM/H/Herbst9/Fragmentary/"
dst = "/Users/lukas.kotatko/TESTING_FILESYSTEM2/H/Herbst9/Fragmentary/"
shutil.move(src, dst)

'/Users/lukas.kotatko/TESTING_FILESYSTEM2/H/Herbst9/Fragmentary/'