Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions src/DataFrame-IO-Tests/DataFrameCsvReaderTest.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ Class {
'commaCsvFile',
'tabCsvFile',
'emptyCsvFile',
'expectedDataFrame'
'expectedDataFrame',
'dataFrameWithoutRowNames',
'dataFrameWithRowNames'
],
#category : #'DataFrame-IO-Tests'
}
Expand All @@ -32,30 +34,55 @@ DataFrameCsvReaderTest >> setUp [
self createFile: tabCsvFile withContents: TestCsvStrings tabCsvString.
self createFile: emptyCsvFile withContents: TestCsvStrings emptyCsvString.

expectedDataFrame := DataFrame withRows: #(
dataFrameWithRowNames := DataFrame withRows: #(
(2.4 true 'rain')
(0.5 true 'rain')
(-1.2 true 'snow')
(-2.3 false '-')
(3.2 true 'rain')).

expectedDataFrame columnNames: #(temperature precipitation type).
expectedDataFrame rowNames: (#('01:10' '01:30' '01:50' '02:10' '02:30')
dataFrameWithRowNames columnNames: #(temperature precipitation type).
dataFrameWithRowNames rowNames: (#('01:10' '01:30' '01:50' '02:10' '02:30')
collect: #asTime).

dataFrameWithoutRowNames := DataFrame withRows: {
{ '01:10' asTime . 2.4 . true . 'rain' } .
{ '01:30' asTime . 0.5 . true . 'rain' } .
{ '01:50' asTime . -1.2 . true . 'snow' } .
{ '02:10' asTime . -2.3 . false . '-' } .
{ '02:30' asTime . 3.2 . true . 'rain' }}.

dataFrameWithoutRowNames columnNames: #(nil temperature precipitation type).
]

{ #category : #tests }
DataFrameCsvReaderTest >> testReadCsv [
| actualDataFrame |
actualDataFrame := DataFrame readFromCsv: commaCsvFile.
self assert: actualDataFrame equals: dataFrameWithoutRowNames.

]

{ #category : #tests }
DataFrameCsvReaderTest >> testReadCsvWithRowNames [
| actualDataFrame |
actualDataFrame := DataFrame readFromCsvWithRowNames: commaCsvFile.
self assert: actualDataFrame equals: expectedDataFrame.
self assert: actualDataFrame equals: dataFrameWithRowNames.

]

{ #category : #tests }
DataFrameCsvReaderTest >> testReadCsvWithSeparatorTab [
DataFrameCsvReaderTest >> testReadCsvWithRowNamesWithSeparatorTab [
| actualDataFrame |
actualDataFrame := DataFrame readFromCsvWithRowNames: tabCsvFile separator: Character tab.
self assert: actualDataFrame equals: expectedDataFrame.
self assert: actualDataFrame equals: dataFrameWithRowNames.

]

{ #category : #tests }
DataFrameCsvReaderTest >> testReadCsvWithSeparatorTab [
| actualDataFrame |
actualDataFrame := DataFrame readFromCsv: tabCsvFile withSeparator: Character tab.
self assert: actualDataFrame equals: dataFrameWithoutRowNames.

]
11 changes: 11 additions & 0 deletions src/DataFrame-IO-Tests/DataFrameCsvWriterTest.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,17 @@ DataFrameCsvWriterTest >> testWriteToCsv [
self assert: actual lines equals: expected lines.
]

{ #category : #tests }
DataFrameCsvWriterTest >> testWriteToCsvLineEndLf [
| writer actual expected |
writer := DataFrameCsvWriter new.
writer lineEndConvention: #lf.
dataFrame writeTo: commaQuoteCsvFile using: writer.
actual := self readFile: commaQuoteCsvFile.
expected := String lf join: TestCsvStrings commaQuoteCsvString lines.
self assert: actual lines equals: expected lines.
]

{ #category : #tests }
DataFrameCsvWriterTest >> testWriteToCsvWithSeparatorTab [
| actual expected |
Expand Down
8 changes: 4 additions & 4 deletions src/DataFrame-IO/DataFrame.extension.st
Original file line number Diff line number Diff line change
Expand Up @@ -10,23 +10,23 @@ DataFrame class >> readFrom: aLocation using: aDataFrameReader [
DataFrame class >> readFromCsv: aFileReference [
| reader |
reader := DataFrameCsvReader new.
^ reader readFrom: aFileReference.
^ self readFrom: aFileReference using: reader.
]

{ #category : #'*DataFrame-IO' }
DataFrame class >> readFromCsv: aFileReference withSeparator: aSeparator [
| reader |
reader := DataFrameCsvReader new.
reader separator: aSeparator.
^ reader readFrom: aFileReference.
^ self readFrom: aFileReference using: reader
]

{ #category : #'*DataFrame-IO' }
DataFrame class >> readFromCsvWithRowNames: aFileReference [
| reader |
reader := DataFrameCsvReader new.
reader includeRowNames: true.
^ reader readFrom: aFileReference.
^ self readFrom: aFileReference using: reader
]

{ #category : #'*DataFrame-IO' }
Expand All @@ -35,7 +35,7 @@ DataFrame class >> readFromCsvWithRowNames: aFileReference separator: aSeparator
reader := DataFrameCsvReader new.
reader includeRowNames: true.
reader separator: aSeparator.
^ reader readFrom: aFileReference.
^ self readFrom: aFileReference using: reader
]

{ #category : #'*DataFrame-IO' }
Expand Down
13 changes: 13 additions & 0 deletions src/DataFrame-Pharo6/OrderedDictionary.extension.st
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Extension { #name : #OrderedDictionary }

{ #category : #'*DataFrame-Pharo6' }
OrderedDictionary class >> newFromKeys: keys andValues: values [
"Create a dictionary from the keys and values arguments which should have the same length."

"(self newFromKeys: #(#x #y) andValues: #(3 6)) >>> (self new at: #x put: 3; at: #y put: 6 ;yourself)"

| dict |
dict := self new.
keys with: values do: [ :k :v | dict at: k put: v ].
^ dict
]
54 changes: 19 additions & 35 deletions src/DataFrame-Tests/DataFrameAggrGroupTest.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -25,24 +25,25 @@ DataFrameAggrGroupTest >> testAggregateAverage [

| expected actual |

expected := #(18.3433 20.79) asDataSeries.
expected keys: #(Male Female).
expected := DataSeries
withKeys: #(Male Female)
values: #(18.3433 20.79)
name: #total_bill.

actual := (df group: #total_bill by: #sex) average.

self assert: actual closeTo: expected.
]

{ #category : #initialization }
DataFrameAggrGroupTest >> testAggregateMax [

| expected actual |

expected := #(23.68 24.59) asDataSeries.
expected keys: #(Male Female).
expected := DataSeries
withKeys: #(Male Female)
values: #(23.68 24.59)
name: #total_bill.

actual := (df group: #total_bill by: #sex) max.

self assert: actual equals: expected.
]

Expand All @@ -51,11 +52,12 @@ DataFrameAggrGroupTest >> testAggregateMin [

| expected actual |

expected := #(10.34 16.99) asDataSeries.
expected keys: #(Male Female).
expected := DataSeries
withKeys: #(Male Female)
values: #(10.34 16.99)
name: #total_bill.

actual := (df group: #total_bill by: #sex) min.

self assert: actual equals: expected.
]

Expand All @@ -78,37 +80,19 @@ DataFrameAggrGroupTest >> testDataFrameGroupedPrintOn [
self assert: actual equals: expected.
]

{ #category : #initialization }
DataFrameAggrGroupTest >> testDataSeriesGroupedPrintOn [

| expected actual |

expected := String new writeStream.
expected
nextPutAll: 'a DataSeriesGrouped'; cr;
nextPutAll: ('Male: a DataSeries [3 items]'); cr;
nextPutAll: ('Female: a DataSeries [2 items]').
expected := expected contents.

actual := String new writeStream.
(df group: #total_bill by: #sex) printOn: actual.
actual := actual contents.

self assert: actual equals: expected.
]

{ #category : #tests }
DataFrameAggrGroupTest >> testGroupSeriesBySeries [

| femaleGroup maleGroup expectedSeries actualSeries |
| femaleGroup maleGroup expected actual |

femaleGroup := #(16.99 24.59) asDataSeries.
maleGroup := #(10.34 21.01 23.68) asDataSeries.

expectedSeries := { maleGroup . femaleGroup } asDataSeries.
expectedSeries keys: #(Male Female).
expected := DataSeries
withKeys: #(Male Female)
values: { maleGroup . femaleGroup }
name: #total_bill.

actualSeries := (df group: #total_bill by: #sex) groups.

self assert: actualSeries equals: expectedSeries.
actual := (df group: #total_bill by: #sex) groups.
self assert: actual equals: expected.
]
58 changes: 0 additions & 58 deletions src/DataFrame-Tests/DataFrameHeadTailTest.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -104,64 +104,6 @@ DataFrameHeadTailTest >> testDataFrameTailN [
self assert: actual equals: expected.
]

{ #category : #tests }
DataFrameHeadTailTest >> testDataSeriesHead [

| actual expected |

expected := #(5.1 4.9 4.7 7 6.4) asDataSeries.
expected name: series name.
expected keys: (1 to: series defaultHeadTailSize).

actual := series head.

self assert: actual equals: expected.
]

{ #category : #tests }
DataFrameHeadTailTest >> testDataSeriesHeadN [

| actual expected |

expected := #(5.1 4.9 4.7) asDataSeries.
expected name: series name.
expected keys: (1 to: 3).

actual := series head: 3.

self assert: actual equals: expected.
]

{ #category : #tests }
DataFrameHeadTailTest >> testDataSeriesTail [

| actual expected |

expected := #(6.4 6.9 6.3 5.8 7.1) asDataSeries.
expected name: series name.
expected keys:
(series size - series defaultHeadTailSize + 1 to: series size).

actual := series tail.

self assert: actual equals: expected.
]

{ #category : #tests }
DataFrameHeadTailTest >> testDataSeriesTailN [

| actual expected |

expected := #(6.3 5.8 7.1) asDataSeries.
expected name: series name.
expected keys:
(series size - 3 + 1 to: series size).

actual := series tail: 3.

self assert: actual equals: expected.
]

{ #category : #tests }
DataFrameHeadTailTest >> testDefaultHeadTailSize [

Expand Down
2 changes: 1 addition & 1 deletion src/DataFrame-Tests/DataFrameQueriesTest.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ DataFrameQueriesTest >> testSelect [

actual := df
select: [ :row |
(row atKey: #a) = 'x' and: (row atKey: #b) < 30 ].
(row at: #a) = 'x' and: (row at: #b) < 30 ].

expected := DataFrame withRows: #(
(x 10 0.25 0.1)
Expand Down
18 changes: 9 additions & 9 deletions src/DataFrame-Tests/DataFrameStatsTest.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ Class {
DataFrameStatsTest >> setUp [

df := DataFrame withRows: #(
(5.1 3.5 1.4 0.2)
(7.1 3.5 1.4 0.2)
(4.9 3 1.4 0.2)
(4.7 3.2 1.3 0.2)
(7 3.2 4.7 1.4)
Expand All @@ -29,7 +29,7 @@ DataFrameStatsTest >> testAverage [

| expected actual |

expected := { 6.022222 . 3.133333 . 3.911111 . 1.277778 } asDataSeries.
expected := { 6.244444 . 3.133333 . 3.911111 . 1.277778 } asDataSeries.
expected name: #average.
expected keys: df columnNames.

Expand All @@ -42,7 +42,7 @@ DataFrameStatsTest >> testFirstQuartile [

| expected actual |

expected := { 5.1 . 3 . 1.4 . 0.2 } asDataSeries.
expected := { 5.8 . 3 . 1.4 . 0.2 } asDataSeries.
expected name: #firstQuartile.
expected keys: df columnNames.

Expand All @@ -55,7 +55,7 @@ DataFrameStatsTest >> testInterquartileRange [

| expected actual |

expected := { 1.8 . 0.2 . 3.7 . 1.7 } asDataSeries.
expected := { 1.2 . 0.2 . 3.7 . 1.7 } asDataSeries.
expected name: #interquartileRange.
expected keys: df columnNames.

Expand All @@ -81,7 +81,7 @@ DataFrameStatsTest >> testMedian [

| expected actual |

expected := { 6.3 . 3.2 . 4.7 . 1.5 } asDataSeries.
expected := { 6.4 . 3.2 . 4.7 . 1.5 } asDataSeries.
expected name: #median.
expected keys: df columnNames.

Expand All @@ -107,7 +107,7 @@ DataFrameStatsTest >> testMode [

| expected actual |

expected := { 4.7 . 3.2 . 1.4 . 0.2 } asDataSeries.
expected := { 7.1 . 3.2 . 1.4 . 0.2 } asDataSeries.
expected name: #mode.
expected keys: df columnNames.

Expand All @@ -133,7 +133,7 @@ DataFrameStatsTest >> testStdev [

| expected actual |

expected := { 0.936453 . 0.223607 . 1.971956 . 0.877180 } asDataSeries.
expected := { 0.927512 . 0.223607 . 1.971956 . 0.877180 } asDataSeries.

expected name: #stdev.
expected keys: df columnNames.
Expand All @@ -147,7 +147,7 @@ DataFrameStatsTest >> testThirdQuartile [

| expected actual |

expected := { 6.9 . 3.2 . 5.1 . 1.9 } asDataSeries.
expected := { 7 . 3.2 . 5.1 . 1.9 } asDataSeries.
expected name: #thirdQuartile.
expected keys: df columnNames.

Expand All @@ -160,7 +160,7 @@ DataFrameStatsTest >> testVariance [

| expected actual |

expected := { 0.876944 . 0.050000 . 3.888611 . 0.769444 } asDataSeries.
expected := { 0.860278 . 0.050000 . 3.888611 . 0.769444 } asDataSeries.
expected name: #variance.
expected keys: df columnNames.

Expand Down
Loading