In [4]:
import os
import pandas as pd
import numpy as np

# Path to the preprocessed_data folder 
# correct this code knowing that the data is in 2 subfolders inside preprocessed_data
preprocessed_folder = 'preprocessed_data'

# Get all files in the folder
files = []
for subfolder in os.listdir(preprocessed_folder):
    subfolder_path = os.path.join(preprocessed_folder, subfolder)
    if os.path.isdir(subfolder_path):
        files.extend([os.path.join(subfolder, f) for f in os.listdir(subfolder_path) if f.endswith('.csv')])
print(f"Found {len(files)} files in {preprocessed_folder}\n")

# Process each file
for file in sorted(files):
    file_path = os.path.join(preprocessed_folder, file)
    df = pd.read_csv(file_path)
    
    # Check if 'speed' column exists
    if 'Speed' in df.columns:
        print(f"File: {file}")
        print(f"  Count: {len(df)}")
        print(f"  Mean Speed: {df['Speed'].mean():.2f}")
        print(f"  Median Speed: {df['Speed'].median():.2f}")
        print(f"  Std Dev: {df['Speed'].std():.2f}")
        print(f"  Min Speed: {df['Speed'].min():.2f}")
        print(f"  Max Speed: {df['Speed'].max():.2f}")
        print(f"  25th Percentile: {df['Speed'].quantile(0.25):.2f}")
        print(f"  75th Percentile: {df['Speed'].quantile(0.75):.2f}")
        print()
    else:
        print(f"File: {file}")
        print(f"  Warning: 'Speed' column not found")
        print()

Found 104 files in preprocessed_data

File: TERBINAFINE+\20240827_piworm09_2.csv
  Count: 21125
  Mean Speed: 1.47
  Median Speed: 0.51
  Std Dev: 2.26
  Min Speed: 0.00
  Max Speed: 10.00
  25th Percentile: 0.12
  75th Percentile: 1.75

File: TERBINAFINE+\20240827_piworm09_5.csv
  Count: 42746
  Mean Speed: 1.22
  Median Speed: 0.54
  Std Dev: 1.94
  Min Speed: 0.00
  Max Speed: 10.00
  25th Percentile: 0.19
  75th Percentile: 1.32

File: TERBINAFINE+\20240827_piworm11_1.csv
  Count: 46237
  Mean Speed: 2.58
  Median Speed: 0.70
  Std Dev: 3.58
  Min Speed: 0.00
  Max Speed: 10.00
  25th Percentile: 0.22
  75th Percentile: 3.24

File: TERBINAFINE+\20240827_piworm11_2.csv
  Count: 50005
  Mean Speed: 2.05
  Median Speed: 0.81
  Std Dev: 2.86
  Min Speed: 0.00
  Max Speed: 10.00
  25th Percentile: 0.30
  75th Percentile: 2.27

File: TERBINAFINE+\20240827_piworm11_3.csv
  Count: 53210
  Mean Speed: 1.71
  Median Speed: 0.63
  Std Dev: 2.66
  Min Speed: 0.00
  Max Speed: 10.00
  25th Perc

In [7]:
if all_speeds:
    combined_speeds = pd.concat(all_speeds)
    q1 = combined_speeds.quantile(0.25)
    q3 = combined_speeds.quantile(0.75)
    iqr = q3 - q1
    upper_fence = q3 + 1.5 * iqr
    
    print(f"Q1 (25th): {q1:.4f}")
    print(f"Q3 (75th): {q3:.4f}")
    print(f"IQR: {iqr:.4f}")
    print(f"Upper Fence (Q3 + 1.5*IQR): {upper_fence:.4f}")
else:
    print("No speed data found.")

Q1 (25th): 0.1847
Q3 (75th): 1.6234
IQR: 1.4387
Upper Fence (Q3 + 1.5*IQR): 3.7814
