In [1]:
import pandas as pd

# Raw data as string
data_str = """
# Data/Bapun
<5>NOTICE: Google drive root 'Data/Bapun': Size may be underestimated due to 2 objects with unknown size
Total objects: 5.615k (5615)
Total size: 20.939 TiB (23022337704558 Byte)
Total objects with unknown size: 2 (2)

# Data/Hiro:
Total objects: 8.036k (8036)
Total size: 4.991 TiB (5487246060762 Byte)

# Data/Jahangir
<5>NOTICE: Google drive root 'Data/Jahangir': Size may be underestimated due to 37 objects with unknown size
Total objects: 2.720k (2720)
Total size: 15.024 TiB (16519203892508 Byte)
Total objects with unknown size: 37 (37)

# Data/KDIBA
Total objects: 10.002k (10002)
Total size: 351.423 GiB (377337175347 Byte)

# Data/Kourosh
Total objects: 84 (84)
Total size: 312.210 GiB (335233248677 Byte)

# Data/Laurel
Total objects: 32.095k (32095)
Total size: 6.628 TiB (7287121141962 Byte)

# Data/Nat
<5>NOTICE: Google drive root 'Data/Nat': Size may be underestimated due to 49 objects with unknown size
Total objects: 88.774k (88774)
Total size: 7.444 TiB (8185217690135 Byte)
Total objects with unknown size: 49 (49)

# Data/Output
Total objects: 696 (696)
Total size: 212.664 MiB (222994516 Byte)

# Data/Rachel
Total objects: 2.061k (2061)
Total size: 1.102 TiB (1211611387062 Byte)

# Data/Utku
Total objects: 2.698k (2698)
Total size: 9.468 TiB (10409887413858 Byte)
"""

# Split the data into sections for each name
sections = [section.strip() for section in data_str.split("#") if section.strip()]

# Extract the data for each section
data = []
for section in sections:
    lines = section.split('\n')
    name = lines[0].replace(":", "").strip()
    
    # Extract total objects
    total_objects_line = [line for line in lines if "Total objects:" in line][0]
    total_objects = int(total_objects_line.split('(')[1].split(')')[0])
    
    # Extract total size and convert to GiB
    total_size_line = [line for line in lines if "Total size:" in line][0]
    size_value = float(total_size_line.split(' ')[2])
    size_unit = total_size_line.split(' ')[3]
    if size_unit == "TiB":
        size_value *= 1024  # Convert TiB to GiB
    elif size_unit == "MiB":
        size_value /= 1024  # Convert MiB to GiB
    
    # Extract notice if exists
    notice_line = [line for line in lines if "NOTICE:" in line]
    notice = notice_line[0] if notice_line else ""
    
    data.append([name, total_objects, size_value, notice])

# Convert data to DataFrame
df = pd.DataFrame(data, columns=['Name', 'Total objects', 'Total Size (GiB)', 'Notes'])

df


Unnamed: 0,Name,Total objects,Total Size (GiB),Notes
0,Data/Bapun,5615,21441.536,<5>NOTICE: Google drive root 'Data/Bapun': Siz...
1,Data/Hiro,8036,5110.784,
2,Data/Jahangir,2720,15384.576,<5>NOTICE: Google drive root 'Data/Jahangir': ...
3,Data/KDIBA,10002,351.423,
4,Data/Kourosh,84,312.21,
5,Data/Laurel,32095,6787.072,
6,Data/Nat,88774,7622.656,<5>NOTICE: Google drive root 'Data/Nat': Size ...
7,Data/Output,696,0.20768,
8,Data/Rachel,2061,1128.448,
9,Data/Utku,2698,9695.232,


In [2]:
# Extracting 'Total objects with unknown size' information
for section in sections:
    lines = section.split('\n')
    name = lines[0].replace(":", "").strip()
    
    # Find the row with the given name
    row_idx = df[df['Name'] == name].index[0]
    
    # Extract total objects with unknown size
    unsizable_objects_line = [line for line in lines if "Total objects with unknown size:" in line]
    if unsizable_objects_line:
        unsizable_objects = int(unsizable_objects_line[0].split('(')[1].split(')')[0])
    else:
        unsizable_objects = 0
    
    df.at[row_idx, 'unsizable_objects'] = unsizable_objects

df


Unnamed: 0,Name,Total objects,Total Size (GiB),Notes,unsizable_objects
0,Data/Bapun,5615,21441.536,<5>NOTICE: Google drive root 'Data/Bapun': Siz...,2.0
1,Data/Hiro,8036,5110.784,,0.0
2,Data/Jahangir,2720,15384.576,<5>NOTICE: Google drive root 'Data/Jahangir': ...,37.0
3,Data/KDIBA,10002,351.423,,0.0
4,Data/Kourosh,84,312.21,,0.0
5,Data/Laurel,32095,6787.072,,0.0
6,Data/Nat,88774,7622.656,<5>NOTICE: Google drive root 'Data/Nat': Size ...,49.0
7,Data/Output,696,0.20768,,0.0
8,Data/Rachel,2061,1128.448,,0.0
9,Data/Utku,2698,9695.232,,0.0


In [6]:
total_combined_size_GB = df['Total Size (GiB)'].sum()
total_combined_size_GB

67834.1446796875

In [26]:
from phoglobushelpers.data_planning_helpers import get_mounted_disks_info, DiskInfo

# Usage
df = get_mounted_disks_info()
display(df)

Unnamed: 0,device,mount_point,fstype,total,used,free,percent
0,/dev/sdc3,/,btrfs,1998.694908,1033.371820,962.794840,51.8
1,/dev/sdc3,/var/lib/snapd/snap,btrfs,1998.694908,1033.371820,962.794840,51.8
2,/dev/sdc2,/boot,ext4,1.020703,0.299221,0.651018,31.5
3,/dev/sdc3,/home,btrfs,1998.694908,1033.371820,962.794840,51.8
4,/dev/loop0,/var/lib/snapd/snap/bare/5,squashfs,0.000131,0.000131,0.000000,100.0
...,...,...,...,...,...,...,...
65,/dev/loop22,/var/lib/snapd/snap/saber/93,squashfs,0.089129,0.089129,0.000000,100.0
66,/dev/loop24,/var/lib/snapd/snap/snapd/19993,squashfs,0.042861,0.042861,0.000000,100.0
67,/dev/loop24,/var/lib/snapd/snap/snapd/19993,squashfs,0.042861,0.042861,0.000000,100.0
68,/dev/sdb1,/media/MAX,ext4,15873.631351,11036.024578,4037.545071,73.2


In [27]:
included_mounts = ['/media/MAX', '/run/media/halechr/HUUUGE', '/media/HugePort']
df = df[np.isin(df['mount_point'], included_mounts)]
df

Unnamed: 0,device,mount_point,fstype,total,used,free,percent
30,/dev/sda2,/media/HugePort,exfat,20000.538624,19271.786693,728.751931,96.4
68,/dev/sdb1,/media/MAX,ext4,15873.631351,11036.024578,4037.545071,73.2
69,/dev/sdd1,/run/media/halechr/HUUUGE,btrfs,20000.580567,5730.646807,14268.464181,28.7
