In [12]:
import numpy as np
import pandas as pd
import os
import sys
from io import StringIO
import glob
import csv
from io import StringIO
from pathlib import PureWindowsPath
import re
from typing import List, Tuple
from image_size import get_image_size #adding image_size module from github (https://github.com/scardine/image_size)
from matplotlib import image 
import matplotlib.pyplot as plt


def get_pixel_scale_factor(image_width, image_height, real_w, real_h):
    """
    Detect pixel scale factor - the scale of the x, y units CPCe used to
    express the point locations.

    This is normally 15 units per pixel, but
    that only holds when CPCe runs in 96 DPI. Earlier versions of CPCe
    (such as CPCe 3.5) did not enforce 96 DPI, so for example, it is
    possible to run in 120 DPI and get a scale of 12 units per pixel.

    We can figure out the scale factor by reading the .cpc file's image
    resolution values. These values are in CPCe's scale, and we know the
    resolution in pixels, so we can solve for the scale factor.
    """
    try:
        cpce_scale_width = int(image_width)
        cpce_scale_height = int(image_height)
    except ValueError:
        print("The image width and height on line 1 must be integers.")

    x_scale = cpce_scale_width / real_w
    y_scale = cpce_scale_height / real_h
    if (not x_scale.is_integer()
            or not y_scale.is_integer()
            or x_scale != y_scale):
        print("Could not establish an integer scale factor from line 1.")
    return x_scale

def cpc_to_RC(cpc_filepath):
    cpc_filename=os.path.basename(cpc_filepath)

    # # # Each line of a .cpc file is like a CSV row.
    # # #
    # # # But different lines have different kinds of values, so we'll go
    # # # through groups of data among lines.

    # # # # Line 1: environment info and image dimensions
    code_filepath, image_filepath, \
        image_width, image_height, \
        display_width, display_height = pd.read_csv(cpc_filepath, nrows=1)
    code_filepath

    # Lines 2-5: annotation area bounds
    # CPCe saves these numbers anywhere from 0 to 4 decimal places.
    # We'll store these numbers as strings, since 1) storing exact
    # float values takes a bit more care compared to ints, and
    # 2) CoralNet doesn't have any reason to read/manipulate
    # these numeric values later on.

    #specify rows to import
    specific_rows = [1,2,3,4]

    annotation_area=pd.read_csv(cpc_filepath, skiprows = lambda x: x not in specific_rows, header=None,names=['x','y'])
    annotation_area.index=['bottom_left','bottom_right','top_right','top_left']

    # Line 6: number of points
    token = pd.read_csv(cpc_filepath, skiprows = lambda x: x not in [5], header=None)

    try:
        num_points = int(token.iloc[0])
        if num_points <= 0:
            raise ValueError
    except ValueError:
        print(f'Line 6 is suppose to have'
            f'the number of points, but this line is not'
            f' a positive integer: {toke.iloc[0]}')

    # Next num_points lines: point positions
    points = []
    points_end=num_points+6
    point_rows = range(6,points_end,1)
    points=pd.read_csv(cpc_filepath, skiprows = lambda x: x not in point_rows, header=None,names=['x','y'])
    # points=points.to_dict('series')

    # Next num_points lines: point ID/Notes data.
    # We're taking advantage of the fact that the previous section
    # and this section are both in point-number order. As long as we
    # maintain that order, we assign labels to the correct points.
    labels_start=points_end
    labels_end=points_end+num_points
    labels_rows = range(labels_start,labels_end,1)
    labels=pd.read_csv(cpc_filepath, skiprows = lambda x: x not in labels_rows, header=None,names=['number_label','id','notes'], usecols=[0,1,3])

    # # Next 28 lines: header fields, one per line.
    # # These lines may or may not be present. (Seems to be all or
    # # nothing, but we won't enforce that here.)

    headers=pd.read_csv(cpc_filepath, skiprows = lambda x: x not in range(labels_end, labels_end+27))

    Point_Image_Path=os.path.join(os.path.dirname(cpc_filepath), os.path.basename(image_filepath))
    # os.path.join(image_folder,os.path.basename(image_filepath))
    point_df=pd.concat([points,labels], axis=1)
    point_df=point_df.assign(Point_Image_Path=Point_Image_Path)

    keeps={
        'Point_Image_Path':'Point Image Path',
        'x':'Point X Coord',
        'y':'Point Y Coord',
        'id':'Point Annotation'
    }

    point_df=point_df[keeps.keys()].rename(keeps,axis=1)
    point_df=pd.DataFrame(point_df)

    # Convert CPC coordinates to Pixel
    try:
        width, height = get_image_size.get_image_size(Point_Image_Path)
    except get_image_size.UnknownImageFormat:
        width, height = -1, -1

    psf=get_pixel_scale_factor(image_width, image_height,width,height)
    ## CPCe units -> pixels conversion.
    point_df['Point Y Coord'] = point_df['Point Y Coord'].transform(lambda x: int(x/psf))
    point_df['Point X Coord'] = point_df['Point X Coord'].transform(lambda x: int(x/psf))

    if any(point_df['Point X Coord'])> width:
        print("Point X ccoordinates outside of the image.")

    if any(point_df['Point Y Coord'])> height:
        print("Point Y coordinates outside of the image.")  

    return(point_df)


In [15]:

image_folder=r"C:\Users\mgonzale\OneDrive - Australian Institute of Marine Science\Documents - ReefCloud\ReefCloud Support\WWF Palawan\WWF_Cagayancillo"
cpc_files=glob.glob(os.path.join(image_folder,"**","*.cpc"), recursive=True)
# cpc_files
dfs=[]
## We'll create three dataframes for an example
for i in range(len(cpc_files)):
    dfs.append(cpc_to_RC(cpc_filepath=cpc_files[i]))

data=pd.concat(dfs)
# dfs=pd.DataFrame.from_dict([dfs])

# dfs
data.to_csv("points.csv", sep=',', index=False)

  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points =

FileNotFoundError: [WinError 2] The system cannot find the file specified: 'C:\\Users\\mgonzale\\OneDrive - Australian Institute of Marine Science\\Documents - ReefCloud\\ReefCloud Support\\WWF Palawan\\WWF_Cagayancillo\\7_Dondonay 1_C50\\GOPR1045 (2).JPG'

In [None]:
image_folder="test_data"
cpc_files=glob.glob(os.path.join(image_folder,"*.cpc"))


df=cpc_to_RC(image_folder, cpc_files[5])

image.imread(df['Point Image Path'][0])

def check_annotations(df, n):
    img = image.imread(df['Point Image Path'][0])

    plt.imshow(img)
    # plt.plot(640, 570, "og", markersize=10)  # og:shorthand for green circle
    plt.scatter(df['Point X Coord'], df['Point Y Coord'], marker="x", color="red", s=200)
    for i, txt in enumerate(df['Point Annotation']):
        plt.annotate(txt, (df['Point X Coord'][i], df['Point Y Coord'][i]),color='white' )
    # plt.show()
    plt.savefig(f"im{str(n)}.jpg")
    plt.close()
    return(None)

for i in range(len(cpc_files)):
    check_annotations(cpc_to_RC(image_folder, cpc_files[i]), i)

  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
  num_points = int(token.iloc[0])
