In [None]:
import csv

def preprocess_csv(input_file, preprocessed_file):
    with open(input_file, 'r') as infile:
        reader = csv.reader(infile)

        # Read and sort rows by frame number
        header = next(reader)
        rows = [row for row in reader]
        rows.sort(key=lambda x: int(x[0]))

    with open(preprocessed_file, 'w', newline='') as outfile:
        writer = csv.writer(outfile)

        # Write header
        writer.writerow(header)

        # Write sorted and corrected rows
        for row in rows:
            # Check and correct size category
            if row[6] == '03-Jun':
                row[6] = '3-6'

            writer.writerow(row)

In [None]:
import csv

def process_csv(input_file, output_file, park, cam, date, fps=25):
    window = 750  # 30 seconds window
    max_distance = 120  # max distance to consider the same group

    # Define the Group class to store group properties
    class Group:
        def __init__(self, first_frame, time, size_category, segment, radius, center_x, center_y):
            self.first_frame = first_frame
            self.time = time
            self.size_category = size_category
            self.segment = segment
            self.radius = radius
            self.center_x = center_x
            self.center_y = center_y
            self.detections = 1
            self.frames_active = 1
            self.active = True

    # Read the input CSV and store rows
    with open(input_file, 'r') as infile:
        reader = csv.reader(infile)
        header = next(reader)
        rows = [row for row in reader]

    active_groups = []
    frame_groups = {}

    for row in rows:
        frame = int(row[0])
        time = row[1]
        center_x = float(row[2])
        center_y = float(row[3])
        radius = float(row[4])
        size = int(row[5])
        size_category = row[6]
        segment = row[7]

        if frame not in frame_groups:
            frame_groups[frame] = []
        frame_groups[frame].append((frame, time, center_x, center_y, radius, size, size_category, segment))

    # Process frames
    for frame in sorted(frame_groups.keys()):
        current_groups = frame_groups[frame]

        # Update existing groups
        for group in active_groups:
            group.frames_active += 1

        for current_group in current_groups:
            frame, time, center_x, center_y, radius, size, size_category, segment = current_group

            matched = False
            for group in active_groups:
                if group.active and group.size_category == size_category and group.segment == segment:
                    distance = ((group.center_x - center_x)**2 + (group.center_y - center_y)**2)**0.5
                    if distance <= max(1.5*group.radius, 150):
                        group.center_x = center_x
                        group.center_y = center_y
                        group.radius = radius
                        group.detections += 1
                        group.frames_active = 0  # Reset frames_active as it got a new detection
                        matched = True
                        break

            if not matched:
                new_group = Group(frame, time, size_category, segment, radius, center_x, center_y)
                active_groups.append(new_group)

        # Deactivate groups that haven't been updated in the window period
        for group in active_groups:
            if group.frames_active > window and group.active:
                group.active = False

    # Filter and write stable groups to the output CSV
    with open(output_file, 'w', newline='') as outfile:
        writer = csv.writer(outfile)
        writer.writerow(['First Frame', 'Detection Time', 'Segment', 'Center X', 'Center Y', 'Radius', 'Size Category', 'Duration', 'Park', 'Camera', 'Date'])

        for group in active_groups:
            if group.detections >= 4:  # Consider only groups with at least 4 detections
                duration = group.frames_active / fps
                if duration >= 10: # Consider only groups that existed for more than 10 seconds
                  writer.writerow([group.first_frame, group.time , group.segment , group.center_x, group.center_y, group.radius, group.size_category, duration, park, cam, date])







In [None]:
park = "Remez"
cam = "1"
date = "2304"
#input_file = '/content/raw_groups_remez_' + date + '_' + time + '_' + cam + '.csv'
input_file = '/content/raw_groups_remez_' + date + '_' + cam + '.csv'
#preprocessed_file = '/content/drive/MyDrive/GroupSizeProject/'  + park + '/' + date + '/' + time + '/cam' + cam + '/raw_groups_remez_' + date + '_' + time + '_' + cam + '.csv'
#output_file = '/content/drive/MyDrive/GroupSizeProject/'  + park + '/' + date + '/' + time + '/cam' + cam + '/stable_groups_remez_' + date + '_' + time + '_' + cam + '.csv'
#summary_file = '/content/drive/MyDrive/GroupSizeProject/' + park + '/' + date + '/' + time + '/cam' + cam + '/size_category_counts_remez_' + date + '_' + time + '_' + cam + '.csv'
preprocessed_file = '/content/drive/MyDrive/GroupSizeProject/'  + park + '/' + date + '/combined/cam' + cam + '/raw_groups_remez_' + date + '_' + cam + '_combined.csv'
output_file = '/content/drive/MyDrive/GroupSizeProject/'  + park + '/' + date + '/combined/cam' + cam + '/stable_groups_remez_' + date + '_' + cam + '_combined_new.csv'
summary_file = '/content/drive/MyDrive/GroupSizeProject/' + park + '/' + date + '/combined/cam' + cam + '/size_category_counts_remez_' + date + '_' + cam + '_combined.csv'


preprocess_csv(input_file, preprocessed_file)
process_csv(preprocessed_file, output_file, park, cam, date)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd

In [None]:
date = '2304'
park = 'Katsenelson'
time1 = '0812'
time2 = '1216'
time3 = '1620'
cam = '2'
file1 = '/content/drive/MyDrive/GroupSizeProject/'  + park + '/' + date + '/' + time1 + '/cam' + cam + '/stable_groups_remez_' + date + '_' + time1 + '_' + cam + '.csv'
file2 = '/content/drive/MyDrive/GroupSizeProject/'  + park + '/' + date + '/' + time2 + '/cam' + cam + '/stable_groups_remez_' + date + '_' + time2 + '_' + cam + '.csv'
file3 = '/content/drive/MyDrive/GroupSizeProject/'  + park + '/' + date + '/' + time3 + '/cam' + cam + '/stable_groups_remez_' + date + '_' + time3 + '_' + cam + '.csv'
output_file = '/content/drive/MyDrive/GroupSizeProject/'  + park + '/' + date + '/combined/cam' + cam + '/stable_groups_remez_' + date + '_' + cam + '_combined.csv'

# Function to read and clean a CSV file
def read_and_clean_csv(file_path, skip_first_row=False):
    if skip_first_row:
        df = pd.read_csv(file_path, skiprows=1)
    else:
        df = pd.read_csv(file_path)

    # Strip leading/trailing spaces from all string columns
    df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)
    return df

# Read and clean the CSV files
df1 = read_and_clean_csv(file1)
df2 = read_and_clean_csv(file2, skip_first_row=True)
df3 = read_and_clean_csv(file3, skip_first_row=True)

# Combine the dataframes
combined_df = pd.concat([df1, df2, df3], ignore_index=True)

# Remove any rows with all NaN values
cleaned_combined_df = combined_df.dropna(how='all')

# Save the cleaned combined dataframe to a new CSV file
cleaned_combined_df.to_csv(output_file, index=False)