In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd

def parse_info_txt(file_path, start_line=1):
    """Parse MIAS Info.txt file and return a clean DataFrame."""
    data = []

    with open(file_path, 'r') as f:
        lines = f.readlines()[start_line - 1:]

        for line in lines:
            parts = line.strip().split()

            # Skip invalid lines
            if not parts or not parts[0].startswith("mdb"):
                continue

            try:
                # Parse common fields
                entry = {
                    "ID": parts[0],
                    "Breast Density": parts[1],
                    "Abnormality Type": parts[2] if len(parts) > 2 else "",
                    "Severity": parts[3] if len(parts) > 3 else "",
                    "X": float(parts[4]) if len(parts) > 4 else None,
                    "Y": float(parts[5]) if len(parts) > 5 else None,
                    "Radius": float(parts[6]) if len(parts) > 6 else None,
                }
                data.append(entry)
            except Exception as e:
                print(f"Skipping malformed line: {line.strip()} -> {e}")

    return pd.DataFrame(data)

In [None]:
info_file = "/content/drive/MyDrive/miniMIAS_Dataset/Info.txt"  # Change path if needed

df_info = parse_info_txt(info_file)

Skipping malformed line: mdb216 D CALC M *NOTE 3* -> could not convert string to float: '*NOTE'
Skipping malformed line: mdb233 G CALC M *NOTE 3* -> could not convert string to float: '*NOTE'
Skipping malformed line: mdb245 F CALC M *NOTE 3* -> could not convert string to float: '*NOTE'


In [None]:
df_filtered = df_info[~(
    df_info["Severity"].isin(["B", "M"]) &
    (df_info["X"].isna() | df_info["Y"].isna() | df_info["Radius"].isna())
)]

In [None]:
import pandas as pd



# MIAS images are 1024 x 1024 pixels
IMAGE_WIDTH = 1024
IMAGE_HEIGHT = 1024


In [None]:

# Function to generate YOLO bbox
def create_yolo_bbox(row):
    if row['Severity'] not in ['B', 'M']:
        return None  # skip if not abnormal

    # class id
    cls = 0 if row['Severity'] == 'B' else 1

    # extract tumor info
    x = row['X']
    y = row['Y']
    r = row['Radius']

    # flip Y (since dataset origin = bottom-left, YOLO = top-left)
    y_flipped = IMAGE_HEIGHT - y

    # bounding box width and height in pixels
    w = 2 * r
    h = 2 * r

    # normalize
    x_center = x / IMAGE_WIDTH
    y_center = y_flipped / IMAGE_HEIGHT
    w_norm = w / IMAGE_WIDTH
    h_norm = h / IMAGE_HEIGHT

    return (cls, x_center, y_center, w_norm, h_norm)



In [None]:
# Apply function to rows where Severity is B or M
df_filtered['yolo_bbox'] = df_filtered.apply(create_yolo_bbox, axis=1)

# Print all matching rows
print(df_filtered.to_string(index=False))

    ID Breast Density Abnormality Type Severity     X     Y  Radius                                                 yolo_bbox
mdb001              G             CIRC        B 535.0 425.0   197.0 (0, 0.5224609375, 0.5849609375, 0.384765625, 0.384765625)
mdb002              G             CIRC        B 522.0 280.0    69.0     (0, 0.509765625, 0.7265625, 0.134765625, 0.134765625)
mdb003              D             NORM            NaN   NaN     NaN                                                      None
mdb004              D             NORM            NaN   NaN     NaN                                                      None
mdb005              F             CIRC        B 477.0 133.0    30.0   (0, 0.4658203125, 0.8701171875, 0.05859375, 0.05859375)
mdb005              F             CIRC        B 500.0 168.0    26.0        (0, 0.48828125, 0.8359375, 0.05078125, 0.05078125)
mdb006              F             NORM            NaN   NaN     NaN                                                   

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['yolo_bbox'] = df_filtered.apply(create_yolo_bbox, axis=1)


In [None]:
# Save new CSV
df_filtered.to_csv("/content/drive/MyDrive/miniMIAS_Dataset/mias_info_with_yolo.csv", index=False)

print("✅ New CSV with YOLO bounding boxes saved: mias_info_with_yolo.csv")

✅ New CSV with YOLO bounding boxes saved: mias_info_with_yolo.csv
