Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/BaselineOfDataFrame/BaselineOfDataFrame.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,17 @@ BaselineOfDataFrame >> baseline: spec [
spec
baseline: 'NeoCSV'
with: [ spec repository: 'github://svenvc/NeoCSV/repository' ].
spec
baseline: 'NeoJSON'
with: [ spec repository: 'github://svenvc/NeoJSON/repository' ].

"Packages"
spec
package: 'DataFrame';
package: 'DataFrame-Tests' with: [ spec requires: #('DataFrame') ];
package: 'DataFrame-Type' with: [ spec requires: #('DataFrame') ];
package: 'DataFrame-Type-Tests' with: [ spec requires: #('DataFrame-Type') ];
package: 'DataFrame-IO' with: [ spec requires: #('DataFrame' 'DataFrame-Type' 'NeoCSV') ];
package: 'DataFrame-IO' with: [ spec requires: #('DataFrame' 'DataFrame-Type' 'NeoCSV' 'NeoJSON') ];
package: 'DataFrame-IO-Tests' with: [ spec requires: #('DataFrame-IO') ]].

spec
Expand Down
148 changes: 148 additions & 0 deletions src/DataFrame-IO-Tests/DataFrameJsonReaderTest.class.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
Class {
#name : #DataFrameJsonReaderTest,
#superclass : #TestCase,
#instVars : [
'directory',
'df',
'dfWithColNames',
'dfWithRowNames',
'dfWithRowColNames'
],
#category : #'DataFrame-IO-Tests'
}

{ #category : #running }
DataFrameJsonReaderTest >> createFileNamed: name withContents: aString [
| stream |
stream := (directory / name) writeStream.
stream nextPutAll: aString.
stream close.
]

{ #category : #running }
DataFrameJsonReaderTest >> setUp [
super setUp.
directory := FileSystem memory workingDirectory / 'testDataForJson'.
directory createDirectory.

self createFileNamed: 'column.json' withContents: TestJsonStrings columnJsonString.
self createFileNamed: 'index.json' withContents: TestJsonStrings indexJsonString.
self createFileNamed: 'nonNull1.json' withContents: TestJsonStrings nonNullJsonString1.
self createFileNamed: 'records.json' withContents: TestJsonStrings recordsJsonString.
self createFileNamed: 'split.json' withContents: TestJsonStrings splitJsonString.
self createFileNamed: 'values.json' withContents: TestJsonStrings valuesJsonString.

df := DataFrame withRows: #(
(1 2 nil nil)
(nil 2 3 nil)
(nil nil nil 5)
(1 nil 2 nil)).

dfWithColNames := df deepCopy.
dfWithColNames columnNames: #('col1' 'col2' 'col3' 'col4').

dfWithRowNames := df deepCopy.
dfWithRowNames rowNames: #('row1' 'row2' 'row3' 'row4').

dfWithRowColNames := df deepCopy.
dfWithRowColNames columnNames: #('col1' 'col2' 'col3' 'col4').
dfWithRowColNames rowNames: #('row1' 'row2' 'row3' 'row4').




]

{ #category : #running }
DataFrameJsonReaderTest >> sortByRowColNames: inputDf [
"Sorts df according to column and row names"

| sortedRowNames sortedColNames dfColSorted outputDf |
sortedRowNames := inputDf rowNames sorted.
sortedColNames := inputDf columnNames sorted.

dfColSorted := DataFrame withRowNames: inputDf rowNames.
sortedColNames do: [ :col | dfColSorted addColumn: (inputDf column: col) asArray named: col ].

outputDf := DataFrame withColumnNames: dfColSorted columnNames.
sortedRowNames do: [ :row | outputDf addRow: (dfColSorted row: row) asArray named: row ].

^ outputDf
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFrom [
| output |

output := DataFrameJsonReader new readFrom: directory / 'nonNull1.json'.

self assert: (self sortByRowColNames: output) equals: dfWithColNames.
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFromColumns [
| output |

output := DataFrameJsonReader new readFrom: directory / 'column.json'.

self assert: (self sortByRowColNames: output) equals: dfWithRowColNames.
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFromIndex [
| output |

output := DataFrameJsonReader new readFrom: directory / 'index.json'.

self assert: (self sortByRowColNames: output) equals: dfWithRowColNames transposed.
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFromJson [
| output |
output := DataFrame readFromJson: (directory / 'nonNull1.json').
self assert: (self sortByRowColNames: output) equals: dfWithColNames.
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFromJsonOrient [
| output |
output := DataFrame readFromJson: (directory / 'split.json') orient: 'split'.
self assert: (self sortByRowColNames: output) equals: dfWithRowColNames.
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFromRecords [
| output |

output := DataFrameJsonReader new readFrom: directory / 'records.json'.

self assert: (self sortByRowColNames: output) equals: dfWithColNames.
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFromSplit [
| output |

output := DataFrameJsonReader new readFrom: directory / 'split.json'.

self assert: (self sortByRowColNames: output) equals: dfWithRowColNames.
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFromString [
| output |

output := DataFrameJsonReader new readFrom: TestJsonStrings nonNullJsonString2.

self assert: (self sortByRowColNames: output) equals: dfWithRowColNames.
]

{ #category : #tests }
DataFrameJsonReaderTest >> testReadFromValues [
| output |

output := DataFrameJsonReader new readFrom: directory / 'values.json'.

self assert: (self sortByRowColNames: output) equals: df.
]
75 changes: 75 additions & 0 deletions src/DataFrame-IO-Tests/DataFrameJsonWriterTest.class.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
Class {
#name : #DataFrameJsonWriterTest,
#superclass : #TestCase,
#instVars : [
'directory',
'df'
],
#category : #'DataFrame-IO-Tests'
}

{ #category : #running }
DataFrameJsonWriterTest >> readFile: aFileReference [
| stream contents |
stream := aFileReference readStream.
contents := stream upToEnd.
stream close.
^ contents
]

{ #category : #running }
DataFrameJsonWriterTest >> setUp [
super setUp.
directory := FileSystem memory workingDirectory / 'testDataForJson'.
directory createDirectory.

df := DataFrame withRows: #(
(1 2 nil nil)
(nil 2 3 nil)
(nil nil nil 5)
(1 nil 2 nil))
rowNames: #('row1' 'row2' 'row3' 'row4')
columnNames: #('col1' 'col2' 'col3' 'col4').
]

{ #category : #tests }
DataFrameJsonWriterTest >> testWriteAsString [
| actual expected |.
actual := DataFrameJsonWriter new writeAsString: df.
expected := '' join: (TestJsonStrings recordsJsonString
regex: '[^\s]+' matchesCollect: [:x|x]).
self assert: actual equals: expected.
]

{ #category : #tests }
DataFrameJsonWriterTest >> testWriteTo [
| file actual expected |
file := directory / 'output.json'.
DataFrameJsonWriter new write: df to: file.
actual := self readFile: file.
expected := '' join: (TestJsonStrings recordsJsonString
regex: '[^\s]+' matchesCollect: [:x|x]).
self assert: actual lines equals: expected lines.
]

{ #category : #tests }
DataFrameJsonWriterTest >> testWriteToJson [
| file actual expected |
file := directory / 'output.json'.
df writeToJson: file.
actual := self readFile: file.
expected := '' join: (TestJsonStrings recordsJsonString
regex: '[^\s]+' matchesCollect: [:x|x]).
self assert: actual lines equals: expected lines.
]

{ #category : #tests }
DataFrameJsonWriterTest >> testWriteToJsonOrient [
| file actual expected |
file := directory / 'output.json'.
df writeToJson: file orient: 'values'.
actual := self readFile: file.
expected := '' join: (TestJsonStrings valuesJsonString
regex: '[^\s]+' matchesCollect: [:x|x]).
self assert: actual lines equals: expected lines.
]
60 changes: 60 additions & 0 deletions src/DataFrame-IO-Tests/TestJsonStrings.class.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
Class {
#name : #TestJsonStrings,
#superclass : #Object,
#category : #'DataFrame-IO-Tests'
}

{ #category : #running }
TestJsonStrings class >> columnJsonString [
^ '{"col1":{"row1":1.0,"row2":null,"row3":null,"row4":1.0},
"col2":{"row1":2.0,"row2":2.0,"row3":null,"row4":null},
"col3":{"row1":null,"row2":3.0,"row3":null,"row4":2.0},
"col4":{"row1":null,"row2":null,"row3":5.0,"row4":null}}'
]

{ #category : #running }
TestJsonStrings class >> indexJsonString [
^ '{"row1":{"col1":1.0,"col2":2.0,"col3":null,"col4":null},
"row2":{"col1":null,"col2":2.0,"col3":3.0,"col4":null},
"row3":{"col1":null,"col2":null,"col3":null,"col4":5.0},
"row4":{"col1":1.0,"col2":null,"col3":2.0,"col4":null}}'
]

{ #category : #running }
TestJsonStrings class >> nonNullJsonString1 [
^ '[{"col1":1, "col2":2},
{"col3":3, "col2":2},
{"col4":5},
{"col1":1, "col3":2}]'
]

{ #category : #running }
TestJsonStrings class >> nonNullJsonString2 [
^ '{
"col1": {"row1":1, "row4":1},
"col2": {"row1":2, "row2":2},
"col3": {"row2":3, "row4":2},
"col4": {"row3":5}
}'
]

{ #category : #running }
TestJsonStrings class >> recordsJsonString [
^ '[{"col1":1,"col2":2,"col3":null,"col4":null},
{"col1":null,"col2":2,"col3":3,"col4":null},
{"col1":null,"col2":null,"col3":null,"col4":5},
{"col1":1,"col2":null,"col3":2,"col4":null}]'
]

{ #category : #running }
TestJsonStrings class >> splitJsonString [
^ '{"columns":["col1","col2","col3","col4"],
"index":["row1","row2","row3","row4"],
"data":[[1.0,2.0,null,null],[null,2.0,3.0,null],[null,null,null,5.0],[1.0,null,2.0,null]]}'
]

{ #category : #running }
TestJsonStrings class >> valuesJsonString [
^ '[[1,2,null,null],[null,2,3,null],
[null,null,null,5],[1,null,2,null]]'
]
47 changes: 47 additions & 0 deletions src/DataFrame-IO/DataFrame.extension.st
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,21 @@ DataFrame class >> readFromCsvWithRowNames: aFileReference separator: aSeparator
^ self readFrom: aFileReference using: reader
]

{ #category : #'*DataFrame-IO' }
DataFrame class >> readFromJson: aFileReference [
| reader |
reader := DataFrameJsonReader new.
^ self readFrom: aFileReference using: reader.
]

{ #category : #'*DataFrame-IO' }
DataFrame class >> readFromJson: aFileReference orient: orient [
| reader |
reader := DataFrameJsonReader new.
reader orient: orient.
^ self readFrom: aFileReference using: reader.
]

{ #category : #'*DataFrame-IO' }
DataFrame >> writeTo: aLocation using: aDataFrameWriter [
"Write data frame to a given location using a given DataFrameWriter. Location can be a file reference, a database connection, or something else (depending on the implementation of the writer)"
Expand All @@ -69,3 +84,35 @@ DataFrame >> writeToCsv: aFileReference withSeparator: aSeparator [
writer separator: aSeparator.
self writeTo: aFileReference using: writer.
]

{ #category : #'*DataFrame-IO' }
DataFrame >> writeToJson: aFileReference [
| writer |
writer := DataFrameJsonWriter new.
self writeTo: aFileReference using: writer.
]

{ #category : #'*DataFrame-IO' }
DataFrame >> writeToJson: aFileReference orient: orient [
| writer |
writer := DataFrameJsonWriter new.
writer orient: orient.
self writeTo: aFileReference using: writer.
]

{ #category : #'*DataFrame-IO' }
DataFrame >> writeToPrettyJson: aFileReference [
| writer |
writer := DataFrameJsonWriter new.
writer pretty: true.
self writeTo: aFileReference using: writer.
]

{ #category : #'*DataFrame-IO' }
DataFrame >> writeToPrettyJson: aFileReference orient: orient [
| writer |
writer := DataFrameJsonWriter new.
writer orient: orient.
writer pretty: true.
self writeTo: aFileReference using: writer.
]
Loading