From 5bd351f97546cdae71c9e6189e24a165d1d44e84 Mon Sep 17 00:00:00 2001 From: laurent Date: Wed, 26 Jun 2019 21:11:28 +0100 Subject: [PATCH] Add some map functions. --- src/dataframe/array_intf.ml | 1 + src/dataframe/column.ml | 7 +++++++ src/dataframe/column.mli | 2 ++ src/dataframe/df.ml | 25 +++++++++++++++++++++++++ src/dataframe/df.mli | 9 +++++++++ src/tests/df_tests.ml | 20 +++++++++++++++++++- 6 files changed, 63 insertions(+), 1 deletion(-) diff --git a/src/dataframe/array_intf.ml b/src/dataframe/array_intf.ml index 489f26c..bd0437f 100644 --- a/src/dataframe/array_intf.ml +++ b/src/dataframe/array_intf.ml @@ -15,6 +15,7 @@ end (* Arrays could be based on different storage types, e.g. array, bigarray, ... *) +(* TODO: add [init] ? *) module type S = sig type t diff --git a/src/dataframe/column.ml b/src/dataframe/column.ml index 93d4973..297d277 100644 --- a/src/dataframe/column.ml +++ b/src/dataframe/column.ml @@ -110,6 +110,12 @@ let select (type a b) (t : (a, b) t) ~indexes = Array.iteri indexes ~f:(fun i index -> M.set data i (M.get t.data index)); { mod_ = t.mod_; data }) +let map : type a b c d. (a, b) t -> (c, d) Array_intf.t -> f:(a -> c) -> (c, d) t = + fun t (module M) ~f -> + Array.init (length t) ~f:(fun i -> get t i |> f) + |> M.of_array + |> of_data (module M) + let fold (type a b) (t : (a, b) t) ~init ~f = let (module M) = t.mod_ in let acc = ref init in @@ -144,3 +150,4 @@ let packed_elt_name (P t) = elt_name t let packed_to_string ?max_rows ?filter (P t) = to_string ?max_rows ?filter t let packed_get_string (P t) i = get_string t i let packed_select (P t) ~indexes = P (select t ~indexes) + diff --git a/src/dataframe/column.mli b/src/dataframe/column.mli index 168320f..3eacd26 100644 --- a/src/dataframe/column.mli +++ b/src/dataframe/column.mli @@ -93,3 +93,5 @@ val packed_elt_name : packed -> string val packed_to_string : ?max_rows:int -> ?filter:Bool_array.t -> packed -> string val packed_get_string : packed -> int -> string val packed_select : packed -> indexes:int array -> packed + +val map : ('a, 'b) t -> ('c, 'd) Array_intf.t -> f:('a -> 'c) -> ('c, 'd) t diff --git a/src/dataframe/df.ml b/src/dataframe/df.ml index 8c53d50..17c2784 100644 --- a/src/dataframe/df.ml +++ b/src/dataframe/df.ml @@ -269,6 +269,31 @@ let add_column_exn t ~name column = add_column t ~name column |> Or_error.ok_exn let map_and_add_column t ~name mod_ f = add_column t ~name (map t mod_ f) let map_and_add_column_exn t ~name mod_ f = add_column_exn t ~name (map t mod_ f) +let map_one: type a b c d. + _ t + -> name:string + -> src:(c, d) Array_intf.t + -> dst:(a, b) Array_intf.t + -> f:(c -> a) + -> (a, b) Column.t + = fun t ~name ~src ~dst ~f -> + let (P column) = get_column_exn t name in + let (module M) = Column.mod_ column in + let (module M') = src in + let (module M_dst) = dst in + match Type_equal.Id.same_witness M.type_id M'.type_id with + | Some T -> + Array.init (Column.length column) ~f:(fun i -> Column.get column i |> f) + |> M_dst.of_array + |> Column.of_data dst + | None -> + Printf.failwithf + "type mismatch for column %s (expected %s got %s)" + name + M.Elt.name + M'.Elt.name + () + let sort (type a) (t : a t) f ~compare = let indexes = let f = Staged.unstage (f (P t)) in diff --git a/src/dataframe/df.mli b/src/dataframe/df.mli index d500c6c..4fabfcc 100644 --- a/src/dataframe/df.mli +++ b/src/dataframe/df.mli @@ -133,6 +133,15 @@ val filter : _ t -> bool R.t -> [ `filtered ] t *) val map : _ t -> ('a, 'b) Array_intf.t -> 'a R.t -> ('a, 'b) Column.t +(** Similar to [map] but using a single column only. *) +val map_one + : _ t + -> name:string + -> src:('c, _) Array_intf.t + -> dst:('a, 'b) Array_intf.t + -> f:('c -> 'a) + -> ('a, 'b) Column.t + (** [map_and_column ?only_filtered t ~name f] returns a dataframe similar to [t] but also adding a column [name] which values are obtained by applying [f] to each row in [t]. diff --git a/src/tests/df_tests.ml b/src/tests/df_tests.ml index 167acc3..d1d2a06 100644 --- a/src/tests/df_tests.ml +++ b/src/tests/df_tests.ml @@ -267,5 +267,23 @@ let%expect_test _ = 1. 1. 8. 8. 2. 2. - |}] + |}]; + let column = + Df.map_one df + ~name:col_e1 + ~src:Native_array.float + ~dst:Native_array.string + ~f:(Printf.sprintf "%.2f") + in + Column.to_string column |> Stdio.printf "%s\n%!"; + [%expect {| + 0 2.00 + 1 7.00 + 2 1.00 + 3 8.00 + 4 2.00 + 5 8.00 + 6 1.00 + 7 8.00 + 8 2.00 |}] )