In [11]:
using CSV, DataFrames

In [12]:
const ROOT_PATH = @__DIR__
const DATA_PATH = "$ROOT_PATH/data"

"/home/justin/Documents/Spring2024/NumericalAnalysis/NA-FinalProject/data"

In [34]:
"""
get_datafiles(foldername::String)

takes in the name of a folder or path to subfolder in /data/
and returns an array of the paths to the different file names 


data_left_sideOf_center_dr = get_datafiles("Center/Left")

--> ["NA-FinalProject/data/Center/Left/Location-Justin-04-04", etc]
"""
function get_datafiles(foldername::String)
     filelist = readdir("$DATA_PATH/$foldername", join=true)
     return filelist
end


get_datafiles

In [35]:
"""
get_folder_dataframes(foldername::String)

takes in the name of a folder or the path to a subfolder in /data/
returns a vector of all the dataframes in that folder



sweetwater_dataframes = get_folder_dataframes("Sweetwater")

"""
function get_folder_dataframes(foldername::String)
     filelist = get_datafiles(foldername)

     dfs = []
     for path in filelist
          data = CSV.File(open(path)) |> DataFrame 
          push!(dfs, data)
     end
     return dfs
end

get_folder_dataframes

In [86]:
"""
filter_matching_rows(dfs::Vector{Any}, column_name::String; round_to=6::Int)

takes in a vector of dataframes and a column to filter them by. It looks through the dataframes
and keeps rows in which the values match. Can specify how close you want values to be by 
using round_to to adjust the precision. Returns a vector of the resulting dataframes


filtered_dfs = (dfs, "latitude"; round_to=5)
"""
function filter_matching_rows(dfs::Vector{Any}, column_name::String; round_to=6::Int)
    # Ensure there is at least one data frame
    if isempty(dfs)
        error("The input vector must contain at least one data frame.")
    end
    
    # Ensure the column exists in all data frames
    for df in dfs
        if !(column_name in names(df))
            error("The specified column must exist in all data frames.")
        end
    end
    
    # Apply rounding and find the intersection of rounded values across all data frames
    rounded_values_sets = [Set(round.(df[!, column_name], digits=round_to)) for df in dfs]
    matching_values = reduce(intersect, rounded_values_sets)
    
    # Filter all data frames to only include rows with values (rounded) in the matching set
    filtered_dfs = [
        filter(row -> round(row[column_name], digits=round_to) in matching_values, df)
        for df in dfs
    ]
    
    return filtered_dfs
end


filter_matching_rows (generic function with 2 methods)

In [100]:
"""
get_filtered_points(dfs::Vector{Any}, column_name::String; round_to=6::Int)

Takes in vector of dataframes generated by get_folder_dataframes. User specifies a column_name 
by which to compare the dataframes. Values are rounded 6 by default, but can be changed. 
The dataframes are trimmed based on these matching rounded values, and only the columns 
latitude, longitude, and altitude are returned. These are all that are needed for displaying the
points 


filtered_points = get_filtered_points(folder_dataframes, "latitude"; round_to=3)

"""
function get_filtered_points(dfs::Vector{Any}, column_name::String; round_to=6::Int)
     filtered_dfs = filter_matching_rows(dfs, column_name; round_to=round_to)


     filtered_points = []

     for df in filtered_dfs
          map!(x->round(x, digits=round_to), df[!, column_name], df[!, column_name])
          new_df = select(df, ["latitude", "longitude", "altitude"])
          push!(filtered_points, new_df)
     end
     
     return filtered_points
end

get_filtered_points (generic function with 1 method)

In [101]:
base_dfs = get_folder_dataframes("Sweetwater");

In [106]:
a = get_filtered_points(base_dfs, "latitude"; round_to=6)

2-element Vector{Any}:
 [1m171×3 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m[1m altitude [0m
     │[90m Float64  [0m[90m Float64   [0m[90m Float64  [0m
─────┼───────────────────────────────
   1 │  29.6447   -82.3487   7.59701
   2 │  29.6447   -82.3487   8.0551
   3 │  29.6447   -82.3487   7.69075
   4 │  29.6446   -82.3487   7.42226
   5 │  29.6446   -82.3487   7.24892
   6 │  29.6446   -82.3487   7.24687
   7 │  29.6446   -82.3487   7.32275
   8 │  29.6446   -82.3487   5.6
  ⋮  │    ⋮          ⋮         ⋮
 165 │  29.6419   -82.3482  -2.2
 166 │  29.6419   -82.3482  -2.2
 167 │  29.6418   -82.3482  -2.2
 168 │  29.6418   -82.3482  -2.2
 169 │  29.6418   -82.3482  -2.2
 170 │  29.6418   -82.3482  -1.2
 171 │  29.6418   -82.3482  -3.4
[36m                     156 rows omitted[0m
 [1m173×3 DataFrame[0m
[1m Row [0m│[1m latitude [0m[1m longitude [0m[1m altitude [0m
     │[90m Float64  [0m[90m Float64   [0m[90m Float64  [0m
─────┼───────────