In [1]:
import os
import hashlib

In [16]:
def get_all_files(folder):
    file_map = {}
    for root, _, files in os.walk(folder):
        for file in files:
            rel_path = os.path.relpath(os.path.join(root, file), folder)
            file_map[rel_path] = os.path.join(root, file)
    return file_map

def compute_hash(filepath):
    hasher = hashlib.sha256()
    with open(filepath, 'rb') as f:
        for chunk in iter(lambda: f.read(8192), b''):
            hasher.update(chunk)
    return hasher.hexdigest()

def compare_folders(folder1, folder2):
    files1 = get_all_files(folder1)
    files2 = get_all_files(folder2)

    only_in_1 = set(files1.keys()) - set(files2.keys())
    only_in_2 = set(files2.keys()) - set(files1.keys())
    common_files = set(files1.keys()) & set(files2.keys())

    print("🟡 Files only in", folder1, ":", only_in_1)
    print("🟡 Files only in", folder2, ":", only_in_2)

    differing_files = []
    for file in common_files:
        hash1 = compute_hash(files1[file])
        hash2 = compute_hash(files2[file])
        if hash1 != hash2:
            differing_files.append(file)

    print("🔴 Files with different content:", differing_files)
    print("✅ Identical files:", common_files - set(differing_files))


def compare_files(f1, f2):
    with open(f1, "r") as f1, open(f2, "r") as f2:
        if f1.read() == f2.read():
            print("✅ Files are exactly the same")
        else:
            print("❌ Files are different")



### Step1: PDScreen_TapHab_August15_2022/N2

In [3]:
f1 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022/N2'
f2 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022_copy/N2'

compare_folders(f1, f2)

🟡 Files only in /Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022/N2 : set()
🟡 Files only in /Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022_copy/N2 : set()
🔴 Files with different content: []
✅ Identical files: {'20220815_103433/N2_10x2_f72h20C_600s31x10s10s_C0811ac.01923.txt', '20220815_102652/N2_10x2_f96h20C_600s31x10s10s_A0811aa.01594.txt', '20220815_121502/N2_10x2_f72h20C_600s31x10s10s_B0811ae.00643.txt', '20220815_103433/N2_10x2_f72h20C_600s31x10s10s_C0811ac.01708.txt', '20220815_121502/N2_10x2_f72h20C_600s31x10s10s_B0811ae.00014.txt', '20220815_103433/N2_10x2_f72h20C_600s31x10s10s_C0811ac.01270.txt', '20220815_103433/N2_10x2_f72h20C_600s31x10s10s_C0811ac.01948.txt', '20220815_121502/N2_10x2_f72h20C_600s31x10s10s_B0811ae.00845.txt', '20220815_101538/N2_10x2_f72h20C_600s31x10s10s_B0811ab.00514.txt', '20220815_103433/N2_10x2_f72h20C_600s31x10s10s_C0811ac.01677.txt', '20220815_102652/N2_10x2_f96h20C_600s31x10s10s_A0811aa.0

### Step1: PDScreen_TapHab_August15_2022/hipr-1_tm10120

In [6]:
f1 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022/hipr-1_tm10120'
f2 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022_copy/hipr-1_tm10120'

compare_folders(f1, f2)

🟡 Files only in /Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022/hipr-1_tm10120 : set()
🟡 Files only in /Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022_copy/hipr-1_tm10120 : {'.DS_Store'}
🔴 Files with different content: []
✅ Identical files: {'20220815_111005/FX31344_10x2_f72h20C_600s31x10s10s_B0811cc.00303.txt', '20220815_113906_removed/FX31344_10x2_f72h20C_600s31x10s10s_C0811cd.00246.txt', '20220815_093805/FX31344_10x2_f72h20C_600s31x10s10s_C0811ca.03578.txt', '20220815_111005/FX31344_10x2_f72h20C_600s31x10s10s_B0811cc.00336.txt', '20220815_111005/FX31344_10x2_f72h20C_600s31x10s10s_B0811cc.00630.txt', '20220815_093805/FX31344_10x2_f72h20C_600s31x10s10s_C0811ca.00018.txt', '20220815_112320/FX31344_10x2_f96h20C_600s31x10s10s_A0811cb.00004.txt', '20220815_093805/FX31344_10x2_f72h20C_600s31x10s10s_C0811ca.03333.txt', '20220815_111005/FX31344_10x2_f72h20C_600s31x10s10s_B0811cc.00614.txt', '20220815_113906_removed/FX31344_10x2_

### Step1: PDScreen_TapHab_August15_2022/hipr-1_ok1081

In [9]:
f1 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022/hipr-1_ok1081'
f2 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022_copy/hipr-1_ok1081'

compare_folders(f1, f2)

🟡 Files only in /Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022/hipr-1_ok1081 : set()
🟡 Files only in /Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022_copy/hipr-1_ok1081 : set()
🔴 Files with different content: []
✅ Identical files: {'20220815_100526/RB1102_10x2_f72h20C_600s31x10s10s_C0811bb.01802.txt', '20220815_100526/RB1102_10x2_f72h20C_600s31x10s10s_C0811bb.02691.txt', '20220815_114931/RB1102_10x2_f72h20C_600s31x10s10s_B0811bd.00046.txt', '20220815_100526/RB1102_10x2_f72h20C_600s31x10s10s_C0811bb.00068.txt', '20220815_120440/RB1102_10x2_f72h20C_600s31x10s10s_C0811be.00126.txt', '20220815_114931/RB1102_10x2_f72h20C_600s31x10s10s_B0811bd.00011.txt', '20220815_100526/RB1102_10x2_f72h20C_600s31x10s10s_C0811bb.02277.txt', '20220815_100526/RB1102_10x2_f72h20C_600s31x10s10s_C0811bb.00018.txt', '20220815_100526/RB1102_10x2_f72h20C_600s31x10s10s_C0811bb.00047.txt', '20220815_100526/RB1102_10x2_f72h20C_600s31x10s10s_C0811bb.02662.

### Step2: PD_Screen_tap_output.csv

In [17]:
f1 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022/PD_Screen_tap_output.csv'
f2 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022_copy/PD_Screen_tap_output.csv'



compare_files(f1, f2)

✅ Files are exactly the same


### Step3: PD_Screen_baseline_output.csv

In [18]:
f1 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022/PD_Screen_baseline_output.csv'
f2 = '/Users/gurmehak/Documents/RankinLab/Test_Datasets/PDScreen_TapHab_August15_2022_copy/PD_Screen_baseline_output.csv'



compare_files(f1, f2)

✅ Files are exactly the same
