Skip to content

Commit

Permalink
Add some map functions.
Browse files Browse the repository at this point in the history
  • Loading branch information
LaurentMazare committed Jun 26, 2019
1 parent 258e927 commit 5bd351f
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 1 deletion.
1 change: 1 addition & 0 deletions src/dataframe/array_intf.ml
Expand Up @@ -15,6 +15,7 @@ end
(* Arrays could be based on different storage types, e.g.
array, bigarray, ...
*)
(* TODO: add [init] ? *)
module type S = sig
type t

Expand Down
7 changes: 7 additions & 0 deletions src/dataframe/column.ml
Expand Up @@ -110,6 +110,12 @@ let select (type a b) (t : (a, b) t) ~indexes =
Array.iteri indexes ~f:(fun i index -> M.set data i (M.get t.data index));
{ mod_ = t.mod_; data })

let map : type a b c d. (a, b) t -> (c, d) Array_intf.t -> f:(a -> c) -> (c, d) t =
fun t (module M) ~f ->
Array.init (length t) ~f:(fun i -> get t i |> f)
|> M.of_array
|> of_data (module M)

let fold (type a b) (t : (a, b) t) ~init ~f =
let (module M) = t.mod_ in
let acc = ref init in
Expand Down Expand Up @@ -144,3 +150,4 @@ let packed_elt_name (P t) = elt_name t
let packed_to_string ?max_rows ?filter (P t) = to_string ?max_rows ?filter t
let packed_get_string (P t) i = get_string t i
let packed_select (P t) ~indexes = P (select t ~indexes)

2 changes: 2 additions & 0 deletions src/dataframe/column.mli
Expand Up @@ -93,3 +93,5 @@ val packed_elt_name : packed -> string
val packed_to_string : ?max_rows:int -> ?filter:Bool_array.t -> packed -> string
val packed_get_string : packed -> int -> string
val packed_select : packed -> indexes:int array -> packed

val map : ('a, 'b) t -> ('c, 'd) Array_intf.t -> f:('a -> 'c) -> ('c, 'd) t
25 changes: 25 additions & 0 deletions src/dataframe/df.ml
Expand Up @@ -269,6 +269,31 @@ let add_column_exn t ~name column = add_column t ~name column |> Or_error.ok_exn
let map_and_add_column t ~name mod_ f = add_column t ~name (map t mod_ f)
let map_and_add_column_exn t ~name mod_ f = add_column_exn t ~name (map t mod_ f)

let map_one: type a b c d.
_ t
-> name:string
-> src:(c, d) Array_intf.t
-> dst:(a, b) Array_intf.t
-> f:(c -> a)
-> (a, b) Column.t
= fun t ~name ~src ~dst ~f ->
let (P column) = get_column_exn t name in
let (module M) = Column.mod_ column in
let (module M') = src in
let (module M_dst) = dst in
match Type_equal.Id.same_witness M.type_id M'.type_id with
| Some T ->
Array.init (Column.length column) ~f:(fun i -> Column.get column i |> f)
|> M_dst.of_array
|> Column.of_data dst
| None ->
Printf.failwithf
"type mismatch for column %s (expected %s got %s)"
name
M.Elt.name
M'.Elt.name
()

let sort (type a) (t : a t) f ~compare =
let indexes =
let f = Staged.unstage (f (P t)) in
Expand Down
9 changes: 9 additions & 0 deletions src/dataframe/df.mli
Expand Up @@ -133,6 +133,15 @@ val filter : _ t -> bool R.t -> [ `filtered ] t
*)
val map : _ t -> ('a, 'b) Array_intf.t -> 'a R.t -> ('a, 'b) Column.t

(** Similar to [map] but using a single column only. *)
val map_one
: _ t
-> name:string
-> src:('c, _) Array_intf.t
-> dst:('a, 'b) Array_intf.t
-> f:('c -> 'a)
-> ('a, 'b) Column.t

(** [map_and_column ?only_filtered t ~name f] returns a dataframe similar
to [t] but also adding a column [name] which values are obtained by
applying [f] to each row in [t].
Expand Down
20 changes: 19 additions & 1 deletion src/tests/df_tests.ml
Expand Up @@ -267,5 +267,23 @@ let%expect_test _ =
1. 1.
8. 8.
2. 2.
|}]
|}];
let column =
Df.map_one df
~name:col_e1
~src:Native_array.float
~dst:Native_array.string
~f:(Printf.sprintf "%.2f")
in
Column.to_string column |> Stdio.printf "%s\n%!";
[%expect {|
0 2.00
1 7.00
2 1.00
3 8.00
4 2.00
5 8.00
6 1.00
7 8.00
8 2.00 |}]
)

0 comments on commit 5bd351f

Please sign in to comment.