diff --git a/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st b/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st index 31ba9c01..ce0c3462 100644 --- a/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st +++ b/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st @@ -25,24 +25,25 @@ DataFrameAggrGroupTest >> testAggregateAverage [ | expected actual | - expected := #(18.3433 20.79) asDataSeries. - expected keys: #(Male Female). + expected := DataSeries + withKeys: #(Male Female) + values: #(18.3433 20.79) + name: #total_bill. actual := (df group: #total_bill by: #sex) average. - self assert: actual closeTo: expected. ] { #category : #initialization } DataFrameAggrGroupTest >> testAggregateMax [ - | expected actual | - expected := #(23.68 24.59) asDataSeries. - expected keys: #(Male Female). + expected := DataSeries + withKeys: #(Male Female) + values: #(23.68 24.59) + name: #total_bill. actual := (df group: #total_bill by: #sex) max. - self assert: actual equals: expected. ] @@ -51,11 +52,12 @@ DataFrameAggrGroupTest >> testAggregateMin [ | expected actual | - expected := #(10.34 16.99) asDataSeries. - expected keys: #(Male Female). + expected := DataSeries + withKeys: #(Male Female) + values: #(10.34 16.99) + name: #total_bill. actual := (df group: #total_bill by: #sex) min. - self assert: actual equals: expected. ] @@ -78,37 +80,19 @@ DataFrameAggrGroupTest >> testDataFrameGroupedPrintOn [ self assert: actual equals: expected. ] -{ #category : #initialization } -DataFrameAggrGroupTest >> testDataSeriesGroupedPrintOn [ - - | expected actual | - - expected := String new writeStream. - expected - nextPutAll: 'a DataSeriesGrouped'; cr; - nextPutAll: ('Male: a DataSeries [3 items]'); cr; - nextPutAll: ('Female: a DataSeries [2 items]'). - expected := expected contents. - - actual := String new writeStream. - (df group: #total_bill by: #sex) printOn: actual. - actual := actual contents. - - self assert: actual equals: expected. -] - { #category : #tests } DataFrameAggrGroupTest >> testGroupSeriesBySeries [ - | femaleGroup maleGroup expectedSeries actualSeries | + | femaleGroup maleGroup expected actual | femaleGroup := #(16.99 24.59) asDataSeries. maleGroup := #(10.34 21.01 23.68) asDataSeries. - expectedSeries := { maleGroup . femaleGroup } asDataSeries. - expectedSeries keys: #(Male Female). + expected := DataSeries + withKeys: #(Male Female) + values: { maleGroup . femaleGroup } + name: #total_bill. - actualSeries := (df group: #total_bill by: #sex) groups. - -self assert: actualSeries equals: expectedSeries. + actual := (df group: #total_bill by: #sex) groups. + self assert: actual equals: expected. ] diff --git a/src/DataFrame-Tests/DataFrameHeadTailTest.class.st b/src/DataFrame-Tests/DataFrameHeadTailTest.class.st index 52c0cb8d..ae6d4e5c 100644 --- a/src/DataFrame-Tests/DataFrameHeadTailTest.class.st +++ b/src/DataFrame-Tests/DataFrameHeadTailTest.class.st @@ -104,64 +104,6 @@ DataFrameHeadTailTest >> testDataFrameTailN [ self assert: actual equals: expected. ] -{ #category : #tests } -DataFrameHeadTailTest >> testDataSeriesHead [ - - | actual expected | - - expected := #(5.1 4.9 4.7 7 6.4) asDataSeries. - expected name: series name. - expected keys: (1 to: series defaultHeadTailSize). - - actual := series head. - - self assert: actual equals: expected. -] - -{ #category : #tests } -DataFrameHeadTailTest >> testDataSeriesHeadN [ - - | actual expected | - - expected := #(5.1 4.9 4.7) asDataSeries. - expected name: series name. - expected keys: (1 to: 3). - - actual := series head: 3. - - self assert: actual equals: expected. -] - -{ #category : #tests } -DataFrameHeadTailTest >> testDataSeriesTail [ - - | actual expected | - - expected := #(6.4 6.9 6.3 5.8 7.1) asDataSeries. - expected name: series name. - expected keys: - (series size - series defaultHeadTailSize + 1 to: series size). - - actual := series tail. - - self assert: actual equals: expected. -] - -{ #category : #tests } -DataFrameHeadTailTest >> testDataSeriesTailN [ - - | actual expected | - - expected := #(6.3 5.8 7.1) asDataSeries. - expected name: series name. - expected keys: - (series size - 3 + 1 to: series size). - - actual := series tail: 3. - - self assert: actual equals: expected. -] - { #category : #tests } DataFrameHeadTailTest >> testDefaultHeadTailSize [ diff --git a/src/DataFrame-Tests/DataFrameQueriesTest.class.st b/src/DataFrame-Tests/DataFrameQueriesTest.class.st index 9ee0842d..b888d8ed 100644 --- a/src/DataFrame-Tests/DataFrameQueriesTest.class.st +++ b/src/DataFrame-Tests/DataFrameQueriesTest.class.st @@ -44,7 +44,7 @@ DataFrameQueriesTest >> testSelect [ actual := df select: [ :row | - (row atKey: #a) = 'x' and: (row atKey: #b) < 30 ]. + (row at: #a) = 'x' and: (row at: #b) < 30 ]. expected := DataFrame withRows: #( (x 10 0.25 0.1) diff --git a/src/DataFrame-Tests/DataFrameStatsTest.class.st b/src/DataFrame-Tests/DataFrameStatsTest.class.st index 1ec16d60..82ceb5b3 100644 --- a/src/DataFrame-Tests/DataFrameStatsTest.class.st +++ b/src/DataFrame-Tests/DataFrameStatsTest.class.st @@ -107,7 +107,7 @@ DataFrameStatsTest >> testMode [ | expected actual | - expected := { 4.7 . 3.2 . 1.4 . 0.2 } asDataSeries. + expected := { 6.3 . 3.2 . 1.4 . 0.2 } asDataSeries. expected name: #mode. expected keys: df columnNames. diff --git a/src/DataFrame-Tests/DataFrameTest.class.st b/src/DataFrame-Tests/DataFrameTest.class.st index 9de2c5c5..f79ae1a9 100644 --- a/src/DataFrame-Tests/DataFrameTest.class.st +++ b/src/DataFrame-Tests/DataFrameTest.class.st @@ -388,9 +388,9 @@ DataFrameTest >> testCollect [ expectedResult columnNames: #(City Population). actualResult := df collect: [ :row | - row atKey: #City put: (row atKey: #City) asUppercase. - row atKey: #Population put: (row atKey: #Population) asInteger. - row removeAtKey: #BeenThere. + row at: #City put: (row at: #City) asUppercase. + row at: #Population put: (row at: #Population) asInteger. + row removeAt: #BeenThere. row ]. self assert: actualResult equals: expectedResult. diff --git a/src/DataFrame-Tests/DataSeriesInternalTest.class.st b/src/DataFrame-Tests/DataSeriesInternalTest.class.st deleted file mode 100644 index cc9beca0..00000000 --- a/src/DataFrame-Tests/DataSeriesInternalTest.class.st +++ /dev/null @@ -1,139 +0,0 @@ -Class { - #name : #DataSeriesInternalTest, - #superclass : #TestCase, - #instVars : [ - 'series' - ], - #category : #'DataFrame-Tests' -} - -{ #category : #initialization } -DataSeriesInternalTest >> setUp [ - - series := DataSeriesInternal - withValues: (10 to: 100 by: 10) asArray. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testAdd [ - - | expected | - - expected := DataSeriesInternal withValues: - #(10 20 30 40 50 60 70 80 90 100 -1). - - series add: -1. - - self assert: series equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testAt [ - - self assert: (series at: 2) equals: 20. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testAtIfAbsent [ - - self assert: (series at: 2) equals: 20. - - self assert: (series at: 100 ifAbsent: [ 'Executing a block' ]) - equals: 'Executing a block'. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testAtPut [ - - | expected | - - series at: 6 put: -2. - expected := DataSeriesInternal withValues: - #(10 20 30 40 50 -2 70 80 90 100). - - self assert: series equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testCollect [ - - | expected actual | - - expected := DataSeriesInternal withValues: (1 to: 10) asArray. - actual := series collect: [ :each | each / 10 ]. - - self assert: actual equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testCollectWithIndex [ - - | expected actual | - - expected := DataSeriesInternal withValues: ((1 to: 10) collect: [ :k | k * 2 ]). - actual := series collectWithIndex: [ :each :i | - each / 10 + i ]. - - self assert: actual equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testDo [ - - | sum | - sum := 0. - - series do: [ :each | - sum := sum + each ]. - - self assert: sum equals: 550. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testDoWithIndex [ - - | sum | - sum := 0. - - series doWithIndex: [ :each :i | - sum := sum + (each / i) ]. - - self assert: sum equals: 100. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testPrintOn [ - - | population expected actual | - population := DataSeriesInternal withValues: #(1.609 2.789 8.788). - - expected := '#(1.609 2.789 8.788)'. - - actual := String new writeStream. - population printOn: actual. - actual := actual contents. - - self assert: actual equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testRemoveAt [ - - | expected | - - expected := DataSeriesInternal withValues: - #(10 30 40 50 60 70 80 90 100). - - series removeAt: 2. - - self assert: series equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testVarSizeInstanceCreation [ - - | seriesInternal | - - seriesInternal := DataSeriesInternal new: 10. - self assert: seriesInternal size equals: 10. -] diff --git a/src/DataFrame-Tests/DataSeriesMathTest.class.st b/src/DataFrame-Tests/DataSeriesMathTest.class.st deleted file mode 100644 index 0426ea78..00000000 --- a/src/DataFrame-Tests/DataSeriesMathTest.class.st +++ /dev/null @@ -1,89 +0,0 @@ -Class { - #name : #DataSeriesMathTest, - #superclass : #TestCase, - #category : #'DataFrame-Tests' -} - -{ #category : #tests } -DataSeriesMathTest >> testAddElementwise [ - - | a b c | - - a := #(1 2 3) asDataSeries. - b := #(3 4 5) asDataSeries. - c := #(4 6 8) asDataSeries. - - self assert: a + b equals: c. -] - -{ #category : #tests } -DataSeriesMathTest >> testCos [ - - | a b pi | - - pi := Float pi. - a := { 0 . pi . pi/2 . pi/4 . pi/3 } asDataSeries. - b := { 1.0 . -1.0 . 0.0 . 1/2 sqrt . 0.5 } asDataSeries. - - self assert: a cos closeTo: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testDivideByScalar [ - - | a b | - - a := #(1 2 3) asDataSeries. - b := #(0.5 1 1.5) asDataSeries. - - self assert: a / 2 equals: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testMultiplyScalar [ - - | a b | - - a := #(1 2 3) asDataSeries. - b := #(2 4 6) asDataSeries. - - self assert: 2 * a equals: b. - self assert: a * 2 equals: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testPowerScalar [ - - | a b | - - a := #(1 2 3) asDataSeries. - b := #(1 4 9) asDataSeries. - - self assert: a ** 2 equals: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testSin [ - - | a b pi | - - pi := Float pi. - a := { 0 . pi . pi/2 . pi/4 . pi/6 } asDataSeries. - b := { 0.0 . 0.0 . 1.0 . 1/2 sqrt . 0.5 } asDataSeries. - - self assert: a sin closeTo: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testSubtractElementwise [ - - | a b c d | - - a := #(1 2 3) asDataSeries. - b := #(3 6 8) asDataSeries. - c := #(-2 -4 -5) asDataSeries. - d := #(2 4 5) asDataSeries. - - self assert: a - b equals: c. - self assert: b - a equals: d. -] diff --git a/src/DataFrame-Tests/DataSeriesSortableTest.class.st b/src/DataFrame-Tests/DataSeriesSortableTest.class.st deleted file mode 100644 index 9b3f62d6..00000000 --- a/src/DataFrame-Tests/DataSeriesSortableTest.class.st +++ /dev/null @@ -1,107 +0,0 @@ -Class { - #name : #DataSeriesSortableTest, - #superclass : #TestCase, - #instVars : [ - 'series' - ], - #category : #'DataFrame-Tests' -} - -{ #category : #tests } -DataSeriesSortableTest >> setUp [ - - series := #(3 2 4 5 1 3 2 5 5 2 1) asDataSeries. - series keys: #(a b c d e f g h i j k). - series name: #TestSeries. -] - -{ #category : #tests } -DataSeriesSortableTest >> testIsSorted [ - - | sorted notSorted | - - sorted := #(1 2 3 5 5) asDataSeries. - notSorted := #(3 2 5 1 5) asDataSeries. - - self assert: sorted isSorted. - self assert: notSorted isSorted not. -] - -{ #category : #tests } -DataSeriesSortableTest >> testIsSortedBy [ - - | sortBlock sorted notSorted1 notSorted2 | - - sortBlock := [ :a :b | a > b ]. - - sorted := #(5 4 3 2 1) asDataSeries. - notSorted1 := #(5 5 3 2 1) asDataSeries. - notSorted2 := #(3 2 5 1 5) asDataSeries. - - self assert: (sorted isSortedBy: sortBlock). - self assert: (notSorted1 isSortedBy: sortBlock) not. - self assert: (notSorted2 isSortedBy: sortBlock) not. -] - -{ #category : #tests } -DataSeriesSortableTest >> testSort [ -"Should sort this series into ascending order using the '<=' operator. Keys should be reordered together with elements" - - | expected | - - series sort. - - expected := #(1 1 2 2 2 3 3 4 5 5 5) asDataSeries. - expected keys: #(e k b g j a f c d h i). - expected name: series name. - - self assert: series equals: expected. -] - -{ #category : #tests } -DataSeriesSortableTest >> testSortBy [ -"Should sort this series using the given sortBlock. Keys should be reordered together with elements" - - | expected | - - series sort: [ :a :b | a >= b ]. - - expected := #(5 5 5 4 3 3 2 2 2 1 1) asDataSeries. - expected keys: #(d h i c a f b g j e k). - expected name: series name. - - self assert: series equals: expected. -] - -{ #category : #tests } -DataSeriesSortableTest >> testSorted [ -"Should return a new series which contains the same elements as self but its elements are sorted in ascending order using the #'<=' operator. Keys should be reordered together with elements. The initial series should not be affected" - - | actual expected | - - actual := series sorted. - - expected := #(1 1 2 2 2 3 3 4 5 5 5) asDataSeries. - expected keys: #(e k b g j a f c d h i). - expected name: series name. - - self assert: actual equals: expected. - self assert: series isSorted not. -] - -{ #category : #tests } -DataSeriesSortableTest >> testSortedBy [ -"Should return a new series which contains the same elements as self but its elements are sorted using the given sortBlock. Keys should be reordered together with elements. The initial series should not be affected" - - | sortBlock actual expected | - - sortBlock := [ :a :b | a >= b ]. - actual := series sorted: sortBlock. - - expected := #(5 5 5 4 3 3 2 2 2 1 1) asDataSeries. - expected keys: #(d h i c a f b g j e k). - expected name: series name. - - self assert: actual equals: expected. - self assert: (series isSortedBy: sortBlock) not. -] diff --git a/src/DataFrame-Tests/DataSeriesStatsTest.class.st b/src/DataFrame-Tests/DataSeriesStatsTest.class.st deleted file mode 100644 index 71c0d43d..00000000 --- a/src/DataFrame-Tests/DataSeriesStatsTest.class.st +++ /dev/null @@ -1,113 +0,0 @@ -Class { - #name : #DataSeriesStatsTest, - #superclass : #TestCase, - #instVars : [ - 'series' - ], - #category : #'DataFrame-Tests' -} - -{ #category : #initialization } -DataSeriesStatsTest >> setUp [ - - series := #(3 7 6 20 8 9 8 10 15 13 16) asDataSeries. -] - -{ #category : #tests } -DataSeriesStatsTest >> testAverage [ - - self assert: series average equals: (115/11). -] - -{ #category : #tests } -DataSeriesStatsTest >> testFirstQuartile [ - - self assert: series firstQuartile equals: 7. -] - -{ #category : #tests } -DataSeriesStatsTest >> testInterquartileRange [ - - self assert: series interquartileRange equals: 8. -] - -{ #category : #tests } -DataSeriesStatsTest >> testMax [ - - self assert: series max equals: 20. -] - -{ #category : #tests } -DataSeriesStatsTest >> testMedian [ - - self assert: series median equals: 9. -] - -{ #category : #tests } -DataSeriesStatsTest >> testMin [ - - self assert: series min equals: 3. -] - -{ #category : #tests } -DataSeriesStatsTest >> testMode [ - - self assert: series mode equals: 8. -] - -{ #category : #tests } -DataSeriesStatsTest >> testQuantile [ - - self assert: (series quantile: 0) equals: 3. - self assert: (series quantile: 10) equals: 6. - self assert: (series quantile: 25) equals: 7. - self assert: (series quantile: 50) equals: 9. - self assert: (series quantile: 75) equals: 15. - self assert: (series quantile: 100) equals: 20. -] - -{ #category : #tests } -DataSeriesStatsTest >> testQuartile [ - - self assert: (series quartile: 0) equals: 3. - self assert: (series quartile: 1) equals: 7. - self assert: (series quartile: 2) equals: 9. - self assert: (series quartile: 3) equals: 15. - self assert: (series quartile: 4) equals: 20. -] - -{ #category : #tests } -DataSeriesStatsTest >> testRange [ - - self assert: series range equals: 17. -] - -{ #category : #tests } -DataSeriesStatsTest >> testStdev [ - - self assert: series stdev closeTo: 5.00727. -] - -{ #category : #tests } -DataSeriesStatsTest >> testSummary [ - - | expected actual | - - expected := { 3.0 . 7.0 . 9.0 . (115 / 11) asFloat . 15.0 . 20.0 } asDataSeries. - expected keys: #(Min '1st Qu.' Median Average '3rd Qu.' Max). - actual := series summary collect: #asFloat. - - self assert: actual equals: expected. -] - -{ #category : #tests } -DataSeriesStatsTest >> testThirdQuartile [ - - self assert: series thirdQuartile equals: 15. -] - -{ #category : #tests } -DataSeriesStatsTest >> testVariance [ - - self assert: series variance closeTo: 25.07273. -] diff --git a/src/DataFrame-Tests/DataSeriesTest.class.st b/src/DataFrame-Tests/DataSeriesTest.class.st index b5370038..af11bfaf 100644 --- a/src/DataFrame-Tests/DataSeriesTest.class.st +++ b/src/DataFrame-Tests/DataSeriesTest.class.st @@ -10,25 +10,100 @@ Class { { #category : #initialization } DataSeriesTest >> setUp [ + keyArray := #(a b c d e f g h i j k). + + series := DataSeries + withKeys: keyArray + values: #(3 7 6 20 8 9 8 10 15 13 16) + name: 'ExampleSeries'. +] - series := (10 to: 100 by: 10) asDataSeries. - keyArray := #(a b c d e f g h i j). +{ #category : #tests } +DataSeriesTest >> testAddArrayToSeries [ + | series array actual expected | + + series := DataSeries withKeys: #(a b c) values: #(1 2 3) name: #X. + array := #(3 4 5). - series keys: keyArray. - series name: 'ExampleSeries'. + expected := DataSeries withKeys: #(a b c) values: #(4 6 8) name: #X. + actual := series + array. + + self assert: actual equals: expected. ] { #category : #tests } -DataSeriesTest >> testAddAtKey [ +DataSeriesTest >> testAddScalarToSeries [ + | series scalar actual expected | + + series := DataSeries withKeys: #(a b c) values: #(1 2 3) name: #X. + scalar := 10. + + expected := DataSeries withKeys: #(a b c) values: #(11 12 13) name: #X. + actual := series + scalar. + + self assert: actual equals: expected. +] - | expected | +{ #category : #tests } +DataSeriesTest >> testAddSeriesToArray [ + | array series actual expected | - series add: -2 atKey: #X. - expected := #(10 20 30 40 50 60 70 80 90 100 -2) asDataSeries. - expected keys: (keyArray copyWith: #X). - expected name: series name. + array := #(1 2 3). + series := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #X. - self assert: series equals: expected. + expected := DataSeries withKeys: #(a b c) values: #(4 6 8) name: #X. + actual := array + series. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testAddSeriesToScalar [ + | scalar series actual expected | + + scalar := 10. + series := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #X. + + expected := DataSeries withKeys: #(a b c) values: #(13 14 15) name: #X. + actual := scalar + series. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testAddSeriesToSeriesDifferentKeys [ + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(x y z) values: #(1 2 3) name: #X. + secondSeries := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #X. + + self should: [ firstSeries + secondSeries ] raise: Error. +] + +{ #category : #tests } +DataSeriesTest >> testAddSeriesToSeriesSameKeysAndName [ + | firstSeries secondSeries actual expected | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 2 3) name: #X. + secondSeries := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #X. + + expected := DataSeries withKeys: #(a b c) values: #(4 6 8) name: #X. + actual := firstSeries + secondSeries. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testAddSeriesToSeriesSameKeysDifferentName [ + | firstSeries secondSeries actual expected | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 2 3) name: #X. + secondSeries := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #Y. + + expected := DataSeries withKeys: #(a b c) values: #(4 6 8) name: '(no name)'. + actual := firstSeries + secondSeries. + + self assert: actual equals: expected. ] { #category : #tests } @@ -36,9 +111,9 @@ DataSeriesTest >> testAsDictionary [ | expected actual | expected := { - 'a' -> 10 . 'b' -> 20 . 'c' -> 30 . 'd' -> 40 . - 'e' -> 50 . 'f' -> 60 . 'g' -> 70 . 'h' -> 80 . - 'i' -> 90 . 'j' -> 100 } asDictionary. + 'a' -> 3 . 'b' -> 7 . 'c' -> 6 . 'd' -> 20 . + 'e' -> 8 . 'f' -> 9 . 'g' -> 8 . 'h' -> 10 . + 'i' -> 15 . 'j' -> 13 . 'k' -> 16 } asDictionary. actual := series asDictionary. self assert: actual equals: expected @@ -47,57 +122,118 @@ DataSeriesTest >> testAsDictionary [ { #category : #tests } DataSeriesTest >> testAt [ - self assert: (series at: 2) equals: 20. + self assert: (series at: #b) equals: 7. ] { #category : #tests } -DataSeriesTest >> testAtIfAbsent [ +DataSeriesTest >> testAtIndex [ - self assert: (series at: 2) equals: 20. - - self assert: (series at: 100 ifAbsent: [ 'Executing a block' ]) + self assert: (series atIndex: 2) equals: 7. +] + +{ #category : #tests } +DataSeriesTest >> testAtIndexIfAbsent [ + + self assert: (series atIndex: 2 ifAbsent: [ 'Executing a block' ]) equals: 7. + self assert: (series atIndex: 100 ifAbsent: [ 'Executing a block' ]) equals: 'Executing a block'. ] { #category : #tests } -DataSeriesTest >> testAtKeyPut [ +DataSeriesTest >> testAtIndexPut [ | expected | - series atKey: #f put: -2. - expected := #(10 20 30 40 50 -2 70 80 90 100) asDataSeries. - expected keys: keyArray. - expected name: series name. + series atIndex: 6 put: -2. + + expected := DataSeries + withKeys: keyArray + values: #(3 7 6 20 8 -2 8 10 15 13 16) + name: series name. self assert: series equals: expected. ] { #category : #tests } -DataSeriesTest >> testAtKeyPutNewElement [ +DataSeriesTest >> testAtPut [ | expected | - series atKey: #X put: -2. - expected := #(10 20 30 40 50 60 70 80 90 100 -2) asDataSeries. - expected keys: (keyArray copyWith: #X). - expected name: series name. + series at: #f put: -2. + + expected := DataSeries + withKeys: keyArray + values: #(3 7 6 20 8 -2 8 10 15 13 16) + name: series name. self assert: series equals: expected. ] { #category : #tests } -DataSeriesTest >> testAtPut [ +DataSeriesTest >> testAtPutNewElement [ | expected | - series at: 6 put: -2. - expected := #(10 20 30 40 50 -2 70 80 90 100) asDataSeries. - expected keys: keyArray. - expected name: series name. + series at: #X put: -2. + + expected := DataSeries + withKeys: (keyArray copyWith: #X) + values: #(3 7 6 20 8 9 8 10 15 13 16 -2) + name: series name. self assert: series equals: expected. ] +{ #category : #tests } +DataSeriesTest >> testAverage [ + + self assert: series average equals: (115/11). +] + +{ #category : #tests } +DataSeriesTest >> testCloseTo [ + + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 2) name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.10000001 2) name: 'A'. + + self assert: firstSeries closeTo: secondSeries. +] + +{ #category : #tests } +DataSeriesTest >> testCloseToDifferentKeys [ + + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 2) name: 'A'. + secondSeries := DataSeries withKeys: #(x y z) values: #(1 0.10000001 2) name: 'A'. + + self deny: (firstSeries closeTo: secondSeries). +] + +{ #category : #tests } +DataSeriesTest >> testCloseToDifferentNames [ + + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 2) name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.10000001 2) name: 'B'. + + self deny: (firstSeries closeTo: secondSeries). +] + +{ #category : #tests } +DataSeriesTest >> testCloseToDifferentValues [ + + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 2) name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 3.10000001 2) name: 'A'. + + self deny: (firstSeries closeTo: secondSeries). +] + { #category : #tests } DataSeriesTest >> testCollect [ @@ -106,73 +242,101 @@ DataSeriesTest >> testCollect [ actual := series collect: [ :each | each / 10 ]. - expected := (1 to: 10) asDataSeries. - expected keys: keyArray. - expected name: series name. + expected := DataSeries + withKeys: keyArray + values: { 3/10 . 7/10 . 3/5 . 2 . 4/5 . 9/10 . 4/5 . 1 . 3/2 . 13/10 . 8/5 } + name: 'ExampleSeries'. self assert: actual equals: expected. ] { #category : #tests } -DataSeriesTest >> testCopy [ +DataSeriesTest >> testCopyCanBeChanged [ - | seriesCopy expectedOriginal expectedCopy | + | original seriesCopy expectedCopyAfterChange | - expectedOriginal := #(10 20 30 40 50 60 70 80 90 100) asDataSeries. - expectedOriginal keys: keyArray. - expectedOriginal name: series name. + original := DataSeries + withKeys: #(a b c d) + values: #(10 30 20 40) + name: #X. - expectedCopy := #(10 20 30 40 50 -2 70 80 100 -3) asDataSeries. - expectedCopy keys: ((keyArray copyWithout: #i) copyWith: #X). - expectedCopy name: #ChangedName. + expectedCopyAfterChange := DataSeries + withKeys: #(b c d e) + values: #(100 20 40 200) + name: #Y. + + seriesCopy := original copy. + + seriesCopy at: #b put: 100. + seriesCopy at: #e put: 200. + seriesCopy removeAt: #a. + seriesCopy name: #Y. - seriesCopy := series copy. - seriesCopy at: 6 put: -2. - seriesCopy removeAtKey: #i. - seriesCopy add: -3 atKey: #X. - seriesCopy name: #ChangedName. + self assert: seriesCopy equals: expectedCopyAfterChange. - self assert: series equals: expectedOriginal. - self assert: seriesCopy equals: expectedCopy. ] { #category : #tests } -DataSeriesTest >> testCreateDataSeriesAsDataSeries [ +DataSeriesTest >> testCopyChangeDoesNotAffectOriginal [ - | dataSeries | - dataSeries := #(a b c) asDataSeries. + | original seriesCopy expectedOriginalAfterChange | + + original := DataSeries + withKeys: #(a b c d) + values: #(10 30 20 40) + name: #X. + + expectedOriginalAfterChange := DataSeries + withKeys: #(a b c d) + values: #(10 30 20 40) + name: #X. + + seriesCopy := original copy. + + seriesCopy at: #b put: 100. + seriesCopy at: #e put: 200. + seriesCopy removeAt: #a. + seriesCopy name: #Y. + + self assert: original equals: expectedOriginalAfterChange. - self assert: dataSeries size equals: 3. - self assert: dataSeries asArray equals: #(a b c). - self assert: dataSeries keys equals: #(1 2 3) asOrderedCollection. - self assert: dataSeries name isNil. ] { #category : #tests } -DataSeriesTest >> testCreateDataSeriesWithValues [ +DataSeriesTest >> testCos [ - | values dataSeries | + | a b pi | - values := #(a b c). - dataSeries := DataSeries withValues: values. + pi := Float pi. + a := { 0 . pi . pi/2 . pi/4 . pi/3 } asDataSeries. + b := { 1.0 . -1.0 . 0.0 . 1/2 sqrt . 0.5 } asDataSeries. + + self assert: a cos closeTo: b. +] + +{ #category : #tests } +DataSeriesTest >> testCreateDataSeriesAsDataSeries [ + + | dataSeries | + dataSeries := #(a b c) asDataSeries. self assert: dataSeries size equals: 3. - self assert: dataSeries asArray equals: values. - self assert: dataSeries keys equals: #(1 2 3) asOrderedCollection. + self assert: dataSeries asArray equals: #(a b c). + self assert: dataSeries keys equals: #(1 2 3). self assert: dataSeries name isNil. ] { #category : #tests } -DataSeriesTest >> testCreateDataSeriesWithValuesKeys [ +DataSeriesTest >> testCreateDataSeriesWithKeysValues [ | values keys dataSeries | values := #(a b c). - keys := #(x y z) asOrderedCollection. + keys := #(x y z). dataSeries := DataSeries - withValues: values - keys: keys. + withKeys: keys + values: values. self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: values. @@ -181,17 +345,17 @@ DataSeriesTest >> testCreateDataSeriesWithValuesKeys [ ] { #category : #tests } -DataSeriesTest >> testCreateDataSeriesWithValuesKeysName [ +DataSeriesTest >> testCreateDataSeriesWithKeysValuesName [ | values keys name dataSeries | values := #(a b c). - keys := #(x y z) asOrderedCollection. + keys := #(x y z). name := 'Some data'. dataSeries := DataSeries - withValues: values - keys: keys + withKeys: keys + values: values name: name. self assert: dataSeries size equals: 3. @@ -200,6 +364,20 @@ DataSeriesTest >> testCreateDataSeriesWithValuesKeysName [ self assert: dataSeries name equals: name. ] +{ #category : #tests } +DataSeriesTest >> testCreateDataSeriesWithValues [ + + | values dataSeries | + + values := #(a b c). + dataSeries := DataSeries withValues: values. + + self assert: dataSeries size equals: 3. + self assert: dataSeries asArray equals: values. + self assert: dataSeries keys equals: #(1 2 3). + self assert: dataSeries name isNil. +] + { #category : #tests } DataSeriesTest >> testCreateDataSeriesWithValuesName [ @@ -214,7 +392,7 @@ DataSeriesTest >> testCreateDataSeriesWithValuesName [ self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: values. - self assert: dataSeries keys equals: #(1 2 3) asOrderedCollection. + self assert: dataSeries keys equals: #(1 2 3). self assert: dataSeries name equals: name. ] @@ -226,119 +404,100 @@ DataSeriesTest >> testCreateEmptyDataSeries [ self assert: dataSeries size equals: 0. self assert: dataSeries asArray equals: #(). - self assert: dataSeries keys equals: #() asOrderedCollection. + self assert: dataSeries keys equals: #(). self assert: dataSeries name isNil. ] { #category : #tests } -DataSeriesTest >> testCreateEmptyDataSeriesWithGivenSize [ +DataSeriesTest >> testCrossTabulateWith [ - | size dataSeries | + | series1 series2 expected | + series1 := #(A B C A A B C A B B) asDataSeries. + series2 := #(1 2 1 2 1 1 2 1 2 2) asDataSeries. - size := 4. - dataSeries := DataSeries new: size. + expected := DataFrame withRows: + #((3 1)(1 3)(1 1)). + + expected rowNames: #(A B C). + expected columnNames: #(1 2). - self assert: dataSeries size equals: size. - self assert: dataSeries asArray equals: #(nil nil nil nil). - self assert: dataSeries keys equals: #(1 2 3 4) asOrderedCollection. - self assert: dataSeries name isNil. + self assert: (series1 crossTabulateWith: series2) equals: expected. ] { #category : #tests } -DataSeriesTest >> testCreateEmptyDataSeriesWithKeys [ +DataSeriesTest >> testDivideByScalar [ - | keys dataSeries | + | a b | - keys := #(x y z) asOrderedCollection. - dataSeries := DataSeries withKeys: keys. + a := #(1 2 3) asDataSeries. + b := #(0.5 1 1.5) asDataSeries. - self assert: dataSeries size equals: 3. - self assert: dataSeries asArray equals: #(nil nil nil). - self assert: dataSeries keys equals: keys. - self assert: dataSeries name isNil. + self assert: a / 2 equals: b. ] { #category : #tests } -DataSeriesTest >> testCreateEmptyDataSeriesWithKeysName [ +DataSeriesTest >> testDo [ - | keys name dataSeries | + | sum | + sum := 0. - keys := #(x y z) asOrderedCollection. - name := 'Some data'. - dataSeries := DataSeries withKeys: keys name: name. + series do: [ :each | + sum := sum + each ]. - self assert: dataSeries size equals: 3. - self assert: dataSeries asArray equals: #(nil nil nil). - self assert: dataSeries keys equals: keys. - self assert: dataSeries name equals: name. + self assert: sum equals: 115. ] { #category : #tests } -DataSeriesTest >> testCreateEmptyDataSeriesWithName [ +DataSeriesTest >> testEighth [ - | name dataSeries | - - name := 'Some data'. - dataSeries := DataSeries withName: name. - - self assert: dataSeries size equals: 0. - self assert: dataSeries asArray equals: #(). - self assert: dataSeries keys equals: #() asOrderedCollection. - self assert: dataSeries name equals: name. + self assert: series eighth equals: 10. ] { #category : #tests } -DataSeriesTest >> testCrossTabulateWith [ +DataSeriesTest >> testEquality [ - | series1 series2 expected | - series1 := #(A B C A A B C A B B) asDataSeries. - series2 := #(1 2 1 2 1 1 2 1 2 2) asDataSeries. + | firstSeries secondSeries | - expected := DataFrame withRows: - #((3 1)(1 3)(1 1)). - - expected rowNames: #(A B C). - expected columnNames: #(1 2). + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. - self assert: (series1 crossTabulateWith: series2) equals: expected. + self assert: firstSeries equals: secondSeries. ] { #category : #tests } -DataSeriesTest >> testDo [ +DataSeriesTest >> testFifth [ - | sum | - sum := 0. - - series do: [ :each | - sum := sum + each ]. - - self assert: sum equals: 550. + self assert: series fifth equals: 8. ] { #category : #tests } -DataSeriesTest >> testDoWithIndex [ +DataSeriesTest >> testFirst [ - | sum | - sum := 0. - - series doWithIndex: [ :each :i | - sum := sum + (each / i) ]. - - self assert: sum equals: 100. + self assert: series first equals: 3. ] { #category : #tests } -DataSeriesTest >> testEquality [ +DataSeriesTest >> testFirstQuartile [ - | a b | - - a := #(1 0.1 'a') asDataSeries. - b := #(1 0.1 'a') asDataSeries. - - a name: 'A'. - b name: 'B'. - - self assert: a equals: b. + self assert: series firstQuartile equals: 7. +] + +{ #category : #tests } +DataSeriesTest >> testFourth [ + + self assert: series fourth equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testFourthQuartile [ + + self assert: series fourthQuartile equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testFourthQuartileEqualsMax [ + + self assert: series fourthQuartile equals: series max. ] { #category : #tests } @@ -357,6 +516,51 @@ DataSeriesTest >> testFrequencyTable [ self assert: aSeries frequencyTable equals: expected ] +{ #category : #tests } +DataSeriesTest >> testGroupBy [ + | firstSeries secondSeries expected actual | + + firstSeries := DataSeries withValues: #(1 10 2 1 5) name: #salary. + secondSeries := DataSeries withValues: #(Male Female Male Male Female) name: #sex. + + expected := DataSeriesGrouped new. + expected groups: (DataSeries + withKeys: #(Female Male) + values: { + DataSeries withValues: #(10 5) . + DataSeries withValues: #(1 2 1) } + name: #salary). + + actual := firstSeries groupBy: secondSeries. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testHead [ + | expected actual | + + expected := DataSeries + withKeys: #(a b c d e) + values: #(3 7 6 20 8) + name: series name. + + actual := series head. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testHeadN [ + | expected actual | + + expected := DataSeries + withKeys: #(a b) + values: #(3 7) + name: series name. + + actual := series head: 2. + self assert: actual equals: expected. +] + { #category : #tests } DataSeriesTest >> testInequality [ @@ -372,172 +576,417 @@ DataSeriesTest >> testInequality [ ] { #category : #tests } -DataSeriesTest >> testPrintOn [ +DataSeriesTest >> testInequalityDifferentKeys [ - | population expected actual | - population := #(1.609 2.789 8.788) asDataSeries. - population keys: #(Barcelona Dubai London). - population name: #Population. - - expected := 'a DataSeries [3 items]'. + | firstSeries secondSeries | - actual := String new writeStream. - population printOn: actual. - actual := actual contents. + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. + secondSeries := DataSeries withKeys: #(x y z) values: #(1 0.1 'a') name: 'A'. - self assert: actual equals: expected. + self assert: (firstSeries ~= secondSeries). ] { #category : #tests } -DataSeriesTest >> testRemoveAt [ +DataSeriesTest >> testInequalityDifferentNames [ - | expected | + | firstSeries secondSeries | - expected := #(10 20 40 50 60 70 80 90 100) asDataSeries. - expected keys: (keyArray copyWithout: #c). - expected name: series name. + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'B'. - series removeAt: 3. + self assert: (firstSeries ~= secondSeries). +] + +{ #category : #tests } +DataSeriesTest >> testInequalityDifferentValues [ + + | firstSeries secondSeries | - self assert: series equals: expected. + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.2 'a') name: 'A'. + + self assert: (firstSeries ~= secondSeries). ] { #category : #tests } -DataSeriesTest >> testRemoveAtKey [ +DataSeriesTest >> testInterquartileRange [ - | expected | + self assert: series interquartileRange equals: 8. +] + +{ #category : #tests } +DataSeriesTest >> testLast [ + + self assert: series last equals: 16. +] + +{ #category : #tests } +DataSeriesTest >> testMax [ + + self assert: series max equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testMedian [ + + self assert: series median equals: 9. +] + +{ #category : #tests } +DataSeriesTest >> testMin [ + + self assert: series min equals: 3. +] + +{ #category : #tests } +DataSeriesTest >> testMode [ + + self assert: series mode equals: 8. +] + +{ #category : #tests } +DataSeriesTest >> testMultiplyScalar [ + + | a b | - expected := #(10 20 40 50 60 70 80 90 100) asDataSeries. - expected keys: (keyArray copyWithout: #c). - expected name: series name. + a := #(1 2 3) asDataSeries. + b := #(2 4 6) asDataSeries. + + self assert: 2 * a equals: b. + self assert: a * 2 equals: b. +] + +{ #category : #tests } +DataSeriesTest >> testNinth [ + + self assert: series ninth equals: 15. +] + +{ #category : #tests } +DataSeriesTest >> testPowerScalar [ + + | a b | - series removeAtKey: #c. + a := #(1 2 3) asDataSeries. + b := #(1 4 9) asDataSeries. - self assert: series equals: expected. + self assert: a ** 2 equals: b. ] { #category : #tests } -DataSeriesTest >> testReplaceFromToWithArray [ +DataSeriesTest >> testQuantile [ + + self assert: (series quantile: 0) equals: 3. + self assert: (series quantile: 10) equals: 6. + self assert: (series quantile: 25) equals: 7. + self assert: (series quantile: 50) equals: 9. + self assert: (series quantile: 75) equals: 15. + self assert: (series quantile: 100) equals: 20. +] - | replacement expected | +{ #category : #tests } +DataSeriesTest >> testQuartile [ - replacement := #(x y z). + self assert: (series quartile: 0) equals: 3. + self assert: (series quartile: 1) equals: 7. + self assert: (series quartile: 2) equals: 9. + self assert: (series quartile: 3) equals: 15. + self assert: (series quartile: 4) equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testRange [ + + self assert: series range equals: 17. +] + +{ #category : #tests } +DataSeriesTest >> testRemoveAt [ + + | expected | - expected := #(10 20 x y z 60 70 80 90 100) asDataSeries. - expected keys: keyArray. - expected name: series name. + expected := DataSeries + withKeys: (keyArray copyWithout: #c) + values: #(3 7 20 8 9 8 10 15 13 16) + name: 'ExampleSeries'. - series replaceFrom: 3 to: 5 with: replacement. + series removeAt: #c. self assert: series equals: expected. ] { #category : #tests } -DataSeriesTest >> testReplaceFromToWithSeries [ +DataSeriesTest >> testRemoveAtIndex [ - | replacement expected | - - replacement := #(x y z) asDataSeries. - replacement keys: #(k1 k2 k3). - replacement name: #Replacement. + | expected | - expected := #(10 20 x y z 60 70 80 90 100) asDataSeries. - expected keys: #(a b k1 k2 k3 f g h i j). - expected name: series name. + expected := DataSeries + withKeys: (keyArray copyWithout: #c) + values: #(3 7 20 8 9 8 10 15 13 16) + name: 'ExampleSeries'. - series replaceFrom: 3 to: 5 with: replacement. + series removeAtIndex: 3. self assert: series equals: expected. ] { #category : #tests } -DataSeriesTest >> testReplaceFromToWithStartingAtArray [ +DataSeriesTest >> testSecond [ - | replacement expected | - - replacement := #(a b c d e). + self assert: series second equals: 7. +] + +{ #category : #tests } +DataSeriesTest >> testSecondQuartile [ + + self assert: series secondQuartile equals: 9. +] + +{ #category : #tests } +DataSeriesTest >> testSecondQuartileEqualsMedian [ + + self assert: series secondQuartile equals: series median. +] + +{ #category : #tests } +DataSeriesTest >> testSeventh [ + + self assert: series seventh equals: 8. +] + +{ #category : #tests } +DataSeriesTest >> testSin [ + + | a b pi | - expected := #(10 20 b c d 60 70 80 90 100) asDataSeries. - expected keys: keyArray. - expected name: series name. + pi := Float pi. + a := { 0 . pi . pi/2 . pi/4 . pi/6 } asDataSeries. + b := { 0.0 . 0.0 . 1.0 . 1/2 sqrt . 0.5 } asDataSeries. - series replaceFrom: 3 to: 5 with: replacement startingAt: 2. + self assert: a sin closeTo: b. +] + +{ #category : #tests } +DataSeriesTest >> testSixth [ + + self assert: series sixth equals: 9. +] + +{ #category : #tests } +DataSeriesTest >> testSort [ + | expected | - self assert: series equals: expected. + expected := DataSeries + withKeys: #(a c b e g f h j i k d) + values: #(3 6 7 8 8 9 10 13 15 16 20) + name: series name. + + series sort. + self assert: series equals: expected ] { #category : #tests } -DataSeriesTest >> testReplaceFromToWithStartingAtSeries [ +DataSeriesTest >> testSortBlock [ + | expected | + + expected := DataSeries + withKeys: #(k j i h d g f e c b a) + values: #(16 13 15 10 20 8 9 8 6 7 3) + name: series name. + + series sort: [ :a :b | a asString size > b asString size ]. + self assert: series equals: expected +] - | replacement expected | +{ #category : #tests } +DataSeriesTest >> testSortDescending [ + | expected | - replacement := #(a b c d e) asDataSeries. - replacement keys: #(k1 k2 k3 k4 k5). - replacement name: #Replacement. + expected := DataSeries + withKeys: #(d k i j h f g e b c a) + values: #(20 16 15 13 10 9 8 8 7 6 3) + name: series name. + + series sortDescending. + self assert: series equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSorted [ + | expected actual | - expected := #(10 20 b c d 60 70 80 90 100) asDataSeries. - expected keys: #(a b k2 k3 k4 f g h i j). - expected name: series name. + expected := DataSeries + withKeys: #(a c b e g f h j i k d) + values: #(3 6 7 8 8 9 10 13 15 16 20) + name: series name. + + actual := series sorted. + self assert: actual equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSortedBlock [ + | expected actual | - series replaceFrom: 3 to: 5 with: replacement startingAt: 2. + expected := DataSeries + withKeys: #(k j i h d g f e c b a) + values: #(16 13 15 10 20 8 9 8 6 7 3) + name: series name. + + actual := series sorted: [ :a :b | a asString size > b asString size ]. + self assert: actual equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSortedBlockDoesNotChangeTheReceiver [ + | expected | - self assert: series equals: expected. + expected := DataSeries + withKeys: #(a b c d e f g h i j k) + values: #(3 7 6 20 8 9 8 10 15 13 16) + name: series name. + + series sorted: [ :a :b | a asString size > b asString size ]. + self assert: series equals: expected ] { #category : #tests } -DataSeriesTest >> testSort [ - | cities expected | +DataSeriesTest >> testSortedDescending [ + | expected actual | - cities := #(London Dubai Paris Berlin) asDataSeries. - cities keys: #(A B C D). - cities name: #Cities. + expected := DataSeries + withKeys: #(d k i j h f g e b c a) + values: #(20 16 15 13 10 9 8 8 7 6 3) + name: series name. + + actual := series sortedDescending. + self assert: actual equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSortedDescendingDoesNotChangeTheReceiver [ + | expected | - expected := #(Berlin Dubai London Paris) asDataSeries. - expected keys: #(D B A C). - expected name: #Cities. + expected := DataSeries + withKeys: #(a b c d e f g h i j k) + values: #(3 7 6 20 8 9 8 10 15 13 16) + name: series name. + + series sortedDescending. + self assert: series equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSortedDoesNotChangeTheReceiver [ + | expected | - cities sort. - self assert: cities equals: expected. + expected := DataSeries + withKeys: #(a b c d e f g h i j k) + values: #(3 7 6 20 8 9 8 10 15 13 16) + name: series name. + + series sorted. + self assert: series equals: expected ] { #category : #tests } -DataSeriesTest >> testSortDescending [ - | cities expected | +DataSeriesTest >> testStdev [ + + self assert: series stdev closeTo: 5.00727. +] + +{ #category : #tests } +DataSeriesTest >> testSubtractElementwise [ + + | a b c d | - cities := #(London Dubai Paris Berlin) asDataSeries. - cities keys: #(A B C D). - cities name: #Cities. + a := #(1 2 3) asDataSeries. + b := #(3 6 8) asDataSeries. + c := #(-2 -4 -5) asDataSeries. + d := #(2 4 5) asDataSeries. + + self assert: a - b equals: c. + self assert: b - a equals: d. +] + +{ #category : #tests } +DataSeriesTest >> testSummary [ + | expected actual | - expected := #(Paris London Dubai Berlin) asDataSeries. - expected keys: #(C A B D). - expected name: #Cities. + expected := DataSeries + withKeys: #(Min '1st Qu.' Median Average '3rd Qu.' Max) + values: { 3 . 7 . 9 . (115 / 11) . 15 . 20 } + name: series name. + + actual := series summary. - cities sortDescending. - self assert: cities equals: expected. + self assert: actual equals: expected. ] { #category : #tests } -DataSeriesTest >> testSortUsing [ - | cities expected | +DataSeriesTest >> testTail [ + | expected actual | - cities := #(London Dubai Paris Berlin) asDataSeries. - cities keys: #(A B C D). - cities name: #Cities. + expected := DataSeries + withKeys: #(g h i j k) + values: #(8 10 15 13 16) + name: series name. + + actual := series tail. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testTailN [ + | expected actual | - expected := #(Dubai London Berlin Paris) asDataSeries. - expected keys: #(B A D C). - expected name: #Cities. + expected := DataSeries + withKeys: #(j k) + values: #(13 16) + name: series name. + + actual := series tail: 2. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testThird [ + + self assert: series third equals: 6. +] + +{ #category : #tests } +DataSeriesTest >> testThirdQuartile [ + + self assert: series thirdQuartile equals: 15. +] + +{ #category : #tests } +DataSeriesTest >> testUniqueValues [ + | aSeries expected actual | + aSeries := DataSeries withKeys: #(a b c d e) values: #(z y y z x). + expected := #(x y z). + actual := aSeries uniqueValues. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testValueCounts [ + | actual expected | - cities sortUsing: [ :a :b | a last <= b last ]. - self assert: cities equals: expected. + expected := DataSeries + withKeys: #(8 16 15 13 10 9 7 6 20 3) + values: #(2 1 1 1 1 1 1 1 1 1) + name: series name. + + actual := series valueCounts. + self assert: actual equals: expected. ] { #category : #tests } -DataSeriesTest >> testVarSizeInstanceCreation [ +DataSeriesTest >> testVariance [ - | aSeries | - aSeries := DataSeries new: 10. - self assert: aSeries size equals: 10. + self assert: series variance closeTo: 25.07273. ] { #category : #tests } @@ -545,11 +994,41 @@ DataSeriesTest >> testWithIndexCollect [ | actual expected | - actual := series collectWithIndex: [ :each :i | + actual := series withIndexCollect: [ :each :i | each / 10 + i ]. - expected := (2 to: 20 by: 2) asDataSeries. - expected name: series name. + expected := DataSeries + withKeys: keyArray + values: {(13/10). (27/10). (18/5). 6. (29/5). (69/10). (39/5). 9. (21/2). (113/10). (63/5)} + name: 'ExampleSeries'. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testWithIndexDo [ + + | sum | + sum := 0. + + series withIndexDo: [ :each :i | + sum := sum + (each / i) ]. + + self assert: sum equals: (108173/4620). +] + +{ #category : #tests } +DataSeriesTest >> testWithKeyCollect [ + + | actual expected | + + actual := series withKeyCollect: [ :each :key | + each / 10 + (keyArray indexOf: key) ]. + + expected := DataSeries + withKeys: keyArray + values: {(13/10). (27/10). (18/5). 6. (29/5). (69/10). (39/5). 9. (21/2). (113/10). (63/5)} + name: 'ExampleSeries'. self assert: actual equals: expected. ] @@ -563,5 +1042,17 @@ DataSeriesTest >> testWithKeyDo [ series withKeyDo: [ :each :key | sum := sum + (each / (keyArray indexOf: key)) ]. - self assert: sum equals: 100. + self assert: sum equals: (108173/4620). +] + +{ #category : #tests } +DataSeriesTest >> testZerothQuartile [ + + self assert: series zerothQuartile equals: 3. +] + +{ #category : #tests } +DataSeriesTest >> testZerothQuartileEqualsMin [ + + self assert: series zerothQuartile equals: series min. ] diff --git a/src/DataFrame-Type/DataFrameTypeDetector.class.st b/src/DataFrame-Type/DataFrameTypeDetector.class.st index 7526052b..9fb0ea08 100644 --- a/src/DataFrame-Type/DataFrameTypeDetector.class.st +++ b/src/DataFrame-Type/DataFrameTypeDetector.class.st @@ -100,7 +100,7 @@ DataFrameTypeDetector >> detectColumnTypeAndConvert: aDataSeries [ DataFrameTypeDetector >> detectTypesAndConvert: aDataFrame [ aDataFrame columnNames do: [ :columnName | aDataFrame column: columnName put: ( - self detectColumnTypeAndConvert: (aDataFrame column: columnName)) ]. + self detectColumnTypeAndConvert: (aDataFrame column: columnName)) asArray ]. aDataFrame rowNames: (self detectColumnTypeAndConvert: aDataFrame rowNames). ] diff --git a/src/DataFrame/Collection.extension.st b/src/DataFrame/Collection.extension.st index 61a63d36..8f45e1b7 100644 --- a/src/DataFrame/Collection.extension.st +++ b/src/DataFrame/Collection.extension.st @@ -8,9 +8,8 @@ Collection >> ** arg [ { #category : #'*DataFrame-Core-Base' } Collection >> asDataSeries [ -"Converts a collection to Array and constructs a DataSeries from its values" - ^ DataSeries withValues: self asArray. + ^ self as: DataSeries ] @@ -27,3 +26,20 @@ Collection >> variance [ ^ self stdev squared. ] + +{ #category : #'*DataFrame' } +Collection >> withSeries: aDataSeries collect: twoArgBlock [ + "Collect and return the result of evaluating twoArgBlock with corresponding elements from this collection and aDataSeries." + | result | + aDataSeries size = self size ifFalse: [self errorSizeMismatch]. + + result := aDataSeries species new: self size. + result name: aDataSeries name. + + aDataSeries keys withIndexDo: [ :key :i | + result at: key put: + (twoArgBlock + value: (self at: i) + value: (aDataSeries at: key))]. + ^ result +] diff --git a/src/DataFrame/DataFrame.class.st b/src/DataFrame/DataFrame.class.st index ebf96e32..9024ae27 100644 --- a/src/DataFrame/DataFrame.class.st +++ b/src/DataFrame/DataFrame.class.st @@ -367,7 +367,7 @@ DataFrame >> addEmptyRowNamed: aString atPosition: aNumber [ { #category : #adding } DataFrame >> addRow: aDataSeries [ "Add DataSeries as a new row at the end" - self addRow: aDataSeries named: aDataSeries name. + self addRow: aDataSeries asArray named: aDataSeries name. ] { #category : #adding } @@ -957,14 +957,17 @@ DataFrame >> select: aBlock [ Collect into a new collection like the receiver, only those elements for which aBlock evaluates to true. Answer the new collection." - | rows selectedRows df | + | rows selectedRows selectedRowNames selectedRowsAsArrays df | rows := self asArrayOfRows. selectedRows := rows select: aBlock. + selectedRowNames := selectedRows collect: #name. + selectedRowsAsArrays := selectedRows collect: #asArray. - df := self class withRows: selectedRows. - df columnNames: self columnNames. - df rowNames: (selectedRows collect: #name). + df := self class + withRows: selectedRowsAsArrays + rowNames: selectedRowNames + columnNames: self columnNames. ^ df ] @@ -972,8 +975,8 @@ DataFrame >> select: aBlock [ { #category : #private } DataFrame >> setDefaultRowColumnNames [ - self rowNames: (1 to: self numberOfRows) asOrderedCollection. - self columnNames: (1 to: self numberOfColumns) asOrderedCollection. + self rowNames: (1 to: self numberOfRows). + self columnNames: (1 to: self numberOfColumns). ] { #category : #'gt-inspector-extension' } @@ -1007,13 +1010,13 @@ DataFrame >> sortBy: columnName using: aBlock [ | column sortedKeys newContents | column := self column: columnName. column := column copy. - column sortUsing: aBlock. + column sort: aBlock. sortedKeys := column keys. newContents := DataFrameInternal new: self dimensions. sortedKeys withIndexDo: [ :key :i | - newContents rowAt: i put: (self row: key) ]. + newContents rowAt: i put: (self row: key) asArray ]. contents := newContents. self rowNames: sortedKeys. diff --git a/src/DataFrame/DataFrameGrouped.class.st b/src/DataFrame/DataFrameGrouped.class.st index 7ff91d14..b31e4c29 100644 --- a/src/DataFrame/DataFrameGrouped.class.st +++ b/src/DataFrame/DataFrameGrouped.class.st @@ -15,7 +15,7 @@ DataFrameGrouped >> apply: aBlock [ | colNames numberOfRows numberOfColumns result | - colNames := (groups at: 1) columnNames. + colNames := groups first columnNames. numberOfRows := groups size. numberOfColumns := colNames size. @@ -24,7 +24,7 @@ DataFrameGrouped >> apply: aBlock [ result rowNames: groups keys. result columnNames: colNames. - groups doWithIndex: [ :df :i | + groups withIndexDo: [ :df :i | 1 to: colNames size do: [ :j | result at: i at: j put: (aBlock value: (df columnAt: j)) ] ]. @@ -38,7 +38,7 @@ DataFrameGrouped >> printOn: aStream [ super printOn: aStream. aStream cr. - groups doWithIndex: [ :eachDataFrame :i | + groups withIndexDo: [ :eachDataFrame :i | aStream nextPutAll: (groups keys at: i) asString; nextPutAll: ': '. @@ -56,15 +56,15 @@ DataFrameGrouped >> split: aDataFrame by: aSeries [ aDataFrame numberOfRows = aSeries size ifFalse: [ SizeMismatch signal ]. - seriesUnique := aSeries unique asArray. + seriesUnique := aSeries uniqueValues. groups := seriesUnique collect: [ :eachUnique | | aList df | aList := LinkedList new. - aSeries doWithIndex: [ :each :i | + aSeries withIndexDo: [ :each :i | each = eachUnique - ifTrue: [ aList add: (aDataFrame rowAt: i) ] ]. + ifTrue: [ aList add: (aDataFrame rowAt: i) asArray ] ]. df := DataFrame withRows: aList. df columnNames: aDataFrame columnNames. diff --git a/src/DataFrame/DataGrouped.class.st b/src/DataFrame/DataGrouped.class.st index bb05e4b4..2fb66169 100644 --- a/src/DataFrame/DataGrouped.class.st +++ b/src/DataFrame/DataGrouped.class.st @@ -7,6 +7,14 @@ Class { #category : #'DataFrame-Core' } +{ #category : #comparing } +DataGrouped >> = anObject [ + self species == anObject species + ifFalse: [ ^ false ]. + + ^ self groups = anObject groups +] + { #category : #private } DataGrouped >> apply: aBlock [ @@ -27,10 +35,14 @@ DataGrouped >> count [ { #category : #accessing } DataGrouped >> groups [ - ^ groups ] +{ #category : #accessing } +DataGrouped >> groups: anObject [ + groups := anObject +] + { #category : #private } DataGrouped >> max [ diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index ac0c2086..91103205 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -1,658 +1,492 @@ -" -I am a data series, suitable for data analysis. -" Class { #name : #DataSeries, - #superclass : #SequenceableCollection, + #superclass : #OrderedDictionary, #instVars : [ - 'contents', - 'keys', 'name' ], #category : #'DataFrame-Core' } { #category : #'instance creation' } -DataSeries class >> new: aNumber [ - - ^ self new initialize: aNumber. - +DataSeries class >> newFrom: aCollection [ + "If it's a collection of associations use the superclass implementation" + ^ super newFrom: ((aCollection first respondsTo: #key) + ifTrue: [ aCollection ] + ifFalse: [ aCollection collectWithIndex: [ :each :i | i -> each ] ]). ] { #category : #'instance creation' } -DataSeries class >> withKeys: anArrayOfKeys [ - - | series | - series := self new: anArrayOfKeys size. - series keys: anArrayOfKeys. - ^ series. - - +DataSeries class >> withKeys: keys values: values [ + ^ self newFromKeys: keys andValues: values ] { #category : #'instance creation' } -DataSeries class >> withKeys: anArrayOfKeys name: aName [ - - | series | - series := self withKeys: anArrayOfKeys. - series name: aName. - ^ series. - - -] - -{ #category : #'instance creation' } -DataSeries class >> withName: aName [ - - | series | - series := self new. - series name: aName. - ^ series. - - -] - -{ #category : #'instance creation' } -DataSeries class >> withValues: anArray [ - - | series | - series := self new: anArray size. - series fillWithValuesOf: anArray. - ^ series. - - -] - -{ #category : #'instance creation' } -DataSeries class >> withValues: anArray keys: anArrayOfKeys [ - - | series | - series := self withValues: anArray. - series keys: anArrayOfKeys. - ^ series. - - +DataSeries class >> withKeys: keys values: values name: aName [ + ^ (self withKeys: keys values: values) name: aName; yourself ] { #category : #'instance creation' } -DataSeries class >> withValues: anArray keys: anArrayOfKeys name: aName [ - - | series | - series := self withValues: anArray name: aName. - series keys: anArrayOfKeys. - ^ series. - - +DataSeries class >> withValues: values [ + | keys | + keys := (1 to: values size) asArray. + ^ self withKeys: keys values: values ] { #category : #'instance creation' } -DataSeries class >> withValues: anArray name: aName [ - - | series | - series := self withValues: anArray. - series name: aName. - ^ series. - - +DataSeries class >> withValues: values name: aName [ + | keys | + keys := (1 to: values size) asArray. + ^ (self withKeys: keys values: values) name: aName; yourself ] { #category : #comparing } -DataSeries >> = otherSeries [ - - (otherSeries isKindOf: DataSeries) - ifFalse: [ ^ false ]. - - "I'm not sure if names should be considered when testing for equality" - "self name = otherSeries name - ifFalse: [ ^ false ]." - - self keys = otherSeries keys +DataSeries >> = anObject [ + (super = anObject) ifFalse: [ ^ false ]. - - ^ (1 to: self size) inject: true into: [ :allEqual :i | - | selfCell otherCell | - selfCell := self at: i. - otherCell := otherSeries at: i. - (allEqual and: (selfCell = otherCell)) - ifTrue: [ true ] - ifFalse: [ - (selfCell isNumber and: otherCell isNumber) - ifFalse: [ false ] - ifTrue: [ (selfCell isNaN and: otherCell isNaN) ] ] ]. + ^ anObject name = self name + "order of keys" + and: [ anObject keys = self keys ] ] -{ #category : #adding } -DataSeries >> add: aValue atKey: aKey [ +{ #category : #adapting } +DataSeries >> adaptToCollection: rcvr andSend: selector [ + "If I am involved in arithmetic with another Collection, return a Collection of + the results of each element combined with the scalar in that expression." - contents add: aValue. - keys := keys copyWith: aKey. -] - -{ #category : #converting } -DataSeries >> asDataFrame [ - - | df | - df := DataFrame withColumns: { self }. - df columnNames: { self name }. - df rowNames: self keys. - ^ df -] - -{ #category : #converting } -DataSeries >> asDataSeries [ - - ^ DataSeries newFrom: self. -] - -{ #category : #converting } -DataSeries >> asDictionary [ - ^ Dictionary newFromKeys: keys andValues: contents + (rcvr isSequenceable and: [ self isSequenceable ]) ifFalse: + [self error: 'Only sequenceable collections may be combined arithmetically']. + ^ rcvr withSeries: self collect: + [:rcvrElement :myElement | rcvrElement perform: selector with: myElement] ] { #category : #accessing } -DataSeries >> at: aNumber [ - - ^ contents at: aNumber +DataSeries >> atIndex: aNumber [ + ^ self at: (self keys at: aNumber) ] { #category : #accessing } -DataSeries >> at: aNumber put: aValue [ - - contents at: aNumber put: aValue. +DataSeries >> atIndex: aNumber ifAbsent: aBlock [ + ^ [ self at: (self keys at: aNumber) ] on: SubscriptOutOfBounds do: aBlock ] { #category : #accessing } -DataSeries >> atKey: aKey [ - - ^ self atKey: aKey ifAbsent: [ - NotFoundError signal: - 'Key ', - aKey asString, - ' was not found in ', - self class asString ]. -] - -{ #category : #accessing } -DataSeries >> atKey: aKey ifAbsent: exceptionBlock [ - - | index | - index := self indexOfKey: aKey ifAbsent: exceptionBlock. - - "In case contents and keys have different sizes" - ^ contents at: index ifAbsent: exceptionBlock. -] - -{ #category : #accessing } -DataSeries >> atKey: aKey put: aValue [ - - | index | - index := keys indexOf: aKey. - - index = 0 - "a key was not found - create a new element" - ifTrue: [ - self add: aValue atKey: aKey ] - "a key was found - update the value" - ifFalse: [ - self at: index put: aValue ]. +DataSeries >> atIndex: aNumber put: aValue [ + ^ self at: (self keys at: aNumber) put: aValue ] { #category : #comparing } -DataSeries >> closeTo: otherSeries [ +DataSeries >> closeTo: anObject [ + self == anObject + ifTrue: [^ true]. - (otherSeries isKindOf: DataSeries) - ifFalse: [ ^ false ]. - - "I'm not sure if names should be considered when testing for equality" - self name = otherSeries name + (self species == anObject species + and: [self size = anObject size]) + ifFalse: [^ false]. + + (anObject name = self name) ifFalse: [ ^ false ]. - self keys = otherSeries keys + (anObject keys = self keys) ifFalse: [ ^ false ]. - ^ (1 to: self size) inject: true into: [ :allEqual :i | - | selfCell otherCell | - selfCell := self at: i. - otherCell := otherSeries at: i. - - allEqual and: ((selfCell closeTo: otherCell) or: - ((selfCell isNumber and: otherCell isNumber) and: - (selfCell isNaN and: otherCell isNaN))) ]. + ^ super closeTo: anObject ] { #category : #enumerating } DataSeries >> collect: aBlock [ - - | series | - series := super collect: aBlock. - series name: self name. - series keys: self keys. - ^ series + | result | + result := super collect: aBlock. + result name: self name. + ^ result ] -{ #category : #'as yet unclassified' } +{ #category : #statistics } DataSeries >> crossTabulateWith: aSeries [ - | df | (self size = aSeries size) ifFalse: [ SizeMismatch signal ]. - "TODO: Rewrite it with DataFrame>>select:" df := DataFrame withRows: - (self unique asArray collect: [ :each1 | - aSeries unique asArray collect: [ :each2 | + (self uniqueValues collect: [ :each1 | + aSeries uniqueValues collect: [ :each2 | (1 to: self size) inject: 0 into: [ :accum :i | (((self at: i) = each1) and: ((aSeries at: i) = each2)) ifTrue: [ accum + 1 ] ifFalse: [ accum ] ] ] ]). - df rowNames: self unique asArray. - df columnNames: aSeries unique asArray. + df rowNames: self uniqueValues. + df columnNames: aSeries uniqueValues. ^ df ] -{ #category : #accessing } +{ #category : #defaults } DataSeries >> defaultHeadTailSize [ - ^ 5 ] -{ #category : #'reflective operations' } -DataSeries >> doesNotUnderstand: aMessage [ +{ #category : #defaults } +DataSeries >> defaultName [ + ^ '(no name)' +] - ^ self collect: [ :each | - each - perform: aMessage selector - withArguments: aMessage arguments ] - +{ #category : #accessing } +DataSeries >> eighth [ + "Answer the eighth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 8 ] -{ #category : #initialization } -DataSeries >> fillWithValuesOf: anArray [ -"Fills a newly created series with values of an array. This method is called from the fromArray: class method. It should only be called after initialize: as it is assumed that the memory was already allocated and the size of the self is equal to the size of array. -This method is private and should not be used in custom applications" +{ #category : #errors } +DataSeries >> errorKeysMismatch [ + Error signal: 'Keys of two series do not match' +] - anArray doWithIndex: [ :each :i | - contents at: i put: each ]. +{ #category : #accessing } +DataSeries >> fifth [ + "Answer the fifth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 5 +] + +{ #category : #accessing } +DataSeries >> first [ + "Answer the first element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 1 ] { #category : #statistics } DataSeries >> firstQuartile [ - ^ self quartile: 1 ] +{ #category : #accessing } +DataSeries >> fourth [ + "Answer the fourth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 4 +] + { #category : #statistics } -DataSeries >> frequencyTable [ +DataSeries >> fourthQuartile [ + ^ self quartile: 4 +] +{ #category : #statistics } +DataSeries >> frequencyTable [ | df count proportion | - count := (self groupBy: self) count. + count := self valueCounts. proportion := count / self size. df := DataFrame withColumns: - { count . proportion }. + { count asArray . proportion asArray }. - df rowNames: self unique asArray. + df rowNames: self uniqueValues asArray. df columnNames: #(Count Proportion). ^ df ] -{ #category : #accessing } -DataSeries >> from: start to: end [ - - | series | - series := ((start to: end) collect: [ :i | - self at: i]) asDataSeries. - - series name: self name. - series keys: (self keys copyFrom: start to: end). - ^ series. -] - -{ #category : #'as yet unclassified' } -DataSeries >> groupBy: aSeries [ - - ^ DataSeriesGrouped group: self by: aSeries -] - -{ #category : #'gt-inspector-extension' } -DataSeries >> gtInspectorItemsIn: composite [ - self asDictionary gtInspectorItemsIn: composite +{ #category : #queries } +DataSeries >> groupBy: otherSeries [ + ^ DataSeriesGrouped group: self by: otherSeries ] -{ #category : #accessing } -DataSeries >> head [ - +{ #category : #slicing } +DataSeries >> head [ ^ self head: self defaultHeadTailSize. ] -{ #category : #accessing } +{ #category : #slicing } DataSeries >> head: aNumber [ - - | n | - - self size > aNumber - ifTrue: [ n := aNumber ] - ifFalse: [ n := self size ]. - - ^ self from: 1 to: n. -] - -{ #category : #private } -DataSeries >> indexOfKey: aKey [ - - ^ self indexOfKey: aKey ifAbsent: [ - NotFoundError signal: - 'Key ', - aKey asString, - ' was not found in ', - self class asString ]. -] - -{ #category : #private } -DataSeries >> indexOfKey: aKey ifAbsent: aBlock [ - - | index | - index := keys indexOf: aKey. - - index = 0 - ifTrue: [ ^ aBlock value ]. - - ^ index + ^ self species + withKeys: (self keys copyFrom: 1 to: aNumber) + values: (self values copyFrom: 1 to: aNumber) + name: self name. ] { #category : #initialization } -DataSeries >> initialize [ - +DataSeries >> initialize [ super initialize. - contents := DataSeriesInternal new. - keys := OrderedCollection new. -] - -{ #category : #initialization } -DataSeries >> initialize: aNumber [ -"Creates an empty DataSeries of a given size. Keys are set to their default values" - - contents := DataSeriesInternal new: aNumber. - self setDefaultKeys. - ^ self + name := self defaultName. ] { #category : #statistics } DataSeries >> interquartileRange [ - - ^ self thirdQuartile - self firstQuartile + ^ self thirdQuartile - self firstQuartile ] -{ #category : #accessing } -DataSeries >> keyAtValue: value [ - - ^ self keyAtValue: value ifAbsent: [ ValueNotFound signalFor: value ] +{ #category : #testing } +DataSeries >> isSequenceable [ + ^ true ] { #category : #accessing } -DataSeries >> keyAtValue: value ifAbsent: exceptionBlock [ - - | index | - index := contents indexOf: value. - - index = 0 - ifTrue: [ ^ exceptionBlock value ] - ifFalse: [ ^ keys at: index ]. +DataSeries >> keys: anArrayOfKeys [ + | keys | + keys := anArrayOfKeys asArray deepCopy. + dictionary := self dictionaryClass newFromKeys: keys andValues: self values. + orderedKeys := keys. ] { #category : #accessing } -DataSeries >> keys [ - - ^ keys +DataSeries >> last [ + "Answer the last element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: self size ] { #category : #accessing } -DataSeries >> keys: anArray [ - - keys := anArray asOrderedCollection. -] - -{ #category : #sorting } -DataSeries >> mergeFirst: first middle: middle last: last into: dst by: aBlock [ - "Private. Merge the sorted ranges [first..middle] and [middle+1..last] - of the receiver into the range [first..last] of dst." - - | i1 i2 key1 key2 val1 val2 out | - i1 := first. - i2 := middle + 1. - key1 := self keys at: i1. - key2 := self keys at: i2. - val1 := self at: i1. - val2 := self at: i2. - out := first - 1. "will be pre-incremented" - - "select 'lower' half of the elements based on comparator" - [ (i1 <= middle) and: [i2 <= last] ] - whileTrue: [ - out := out + 1. - - (aBlock value: val1 value: val2) - ifTrue: [ - dst at: out put: val1. - dst keys at: out put: key1. - i1 := i1 + 1. - val1 := self at: i1. - key1 := self keys at: i1 ] - - ifFalse: [ - dst at: out put: val2. - dst keys at: out put: key2. - i2 := i2 + 1. - i2 <= last - ifTrue: [ - val2 := self at: i2. - key2 := self keys at: i2 ] ] ]. - - "copy the remaining elements" - i1 <= middle - ifTrue: [dst replaceFrom: out + 1 to: last with: self startingAt: i1] - ifFalse: [dst replaceFrom: out + 1 to: last with: self startingAt: i2] -] - -{ #category : #statistics } DataSeries >> mode [ - | valueCounts maxCount | - valueCounts := (self groupBy: self) count. maxCount := valueCounts max. - ^ valueCounts keyAtValue: maxCount. ] { #category : #accessing } DataSeries >> name [ - ^ name ] { #category : #accessing } -DataSeries >> name: aString [ - - name := aString -] - -{ #category : #copying } -DataSeries >> postCopy [ - - keys := keys copy. - contents := contents copy. +DataSeries >> name: anObject [ + name := anObject ] -{ #category : #printing } -DataSeries >> printOn: aStream [ - - | title | - title := self class name. - - aStream - nextPutAll: (title first isVowel ifTrue: ['an '] ifFalse: ['a ']); - nextPutAll: title; - nextPutAll: ' ['; - nextPutAll: self size asString; - nextPutAll: (self size % 10 = 1 ifTrue: [' item]'] ifFalse: [' items]']). +{ #category : #accessing } +DataSeries >> ninth [ + "Answer the ninth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 9 ] { #category : #statistics } -DataSeries >> quantile: number [ - +DataSeries >> quantile: aNumber [ | sortedSeries index | + sortedSeries := self sort. - sortedSeries := self asArray sort asDataSeries. - - number = 0 + aNumber = 0 ifTrue: [ ^ sortedSeries first ]. - index := (sortedSeries size * (number / 100)) ceiling. - ^ sortedSeries at: index. + index := (sortedSeries size * (aNumber / 100)) ceiling. + ^ sortedSeries atIndex: index. ] { #category : #statistics } -DataSeries >> quartile: number [ - - ^ self quantile: (25 * number) +DataSeries >> quartile: aNumber [ + ^ self quantile: (25 * aNumber) ] { #category : #removing } -DataSeries >> removeAt: aNumber [ - - contents removeAt: aNumber. - keys := keys copyWithoutIndex: aNumber. +DataSeries >> removeAt: aKey [ + ^ self removeKey: aKey ] { #category : #removing } -DataSeries >> removeAtKey: aKey [ - - | index | - index := self indexOfKey: aKey. - self removeAt: index. +DataSeries >> removeAtIndex: aNumber [ + ^ self removeAt: (self keys at: aNumber) ] { #category : #accessing } -DataSeries >> replaceFrom: start to: stop with: replacement [ - - (replacement isKindOf: self class) - ifTrue: [ - keys - replaceFrom: start - to: stop - with: replacement keys ]. - - super replaceFrom: start to: stop with: replacement. +DataSeries >> second [ + "Answer the second element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 2 +] + +{ #category : #statistics } +DataSeries >> secondQuartile [ + ^ self quartile: 2 ] { #category : #accessing } -DataSeries >> replaceFrom: start to: stop with: replacement startingAt: repStart [ - - (replacement isKindOf: self class) - ifTrue: [ - keys - replaceFrom: start - to: stop - with: replacement keys - startingAt: repStart ]. - - super replaceFrom: start to: stop with: replacement startingAt: repStart . +DataSeries >> seventh [ + "Answer the seventh element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 7 ] -{ #category : #initialization } -DataSeries >> setDefaultKeys [ -"Sets the keys of a series to their default values - to an array of numbers from 1 to self size. This method can be reimplemented by a subclass to provide different default keys" +{ #category : #accessing } +DataSeries >> sixth [ + "Answer the sixth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 6 +] - keys := (1 to: self size) asOrderedCollection. +{ #category : #sorting } +DataSeries >> sort [ + self sort: [ :a :b | a <= b ] ] -{ #category : #accessing } -DataSeries >> size [ +{ #category : #sorting } +DataSeries >> sort: aBlock [ + | associationBlock | + associationBlock := [ :a :b | aBlock value: a value value: b value ]. + self sortAssociations: associationBlock +] - ^ contents size +{ #category : #sorting } +DataSeries >> sortAssociations: aBlock [ + | sortedAssociations | + sortedAssociations := self associations sort: aBlock. + self removeAll. + self addAll: sortedAssociations. ] { #category : #sorting } DataSeries >> sortDescending [ - ^ self sortUsing: [ :a :b | a >= b ] + self sort: [ :a :b | a > b ] ] { #category : #sorting } -DataSeries >> sortUsing: aBlock [ - ^ self sort: aBlock +DataSeries >> sorted [ + ^ self sorted: [ :a :b | a <= b ] ] -{ #category : #accessing } -DataSeries >> summary [ +{ #category : #sorting } +DataSeries >> sorted: aBlock [ + | associationBlock | + associationBlock := [ :a :b | aBlock value: a value value: b value ]. + ^ self sortedAssociations: associationBlock +] +{ #category : #sorting } +DataSeries >> sortedAssociations: aBlock [ + | sortedAssociations | + sortedAssociations := self associations sort: aBlock. + ^ sortedAssociations asDataSeries name: self name; yourself +] + +{ #category : #sorting } +DataSeries >> sortedDescending [ + ^ self sorted: [ :a :b | a > b ] +] + +{ #category : #statistics } +DataSeries >> summary [ | summary | - summary := self class new. + summary := self species new. summary name: self name. summary - atKey: 'Min' put: self min; - atKey: '1st Qu.' put: self firstQuartile; - atKey: 'Median' put: self median; - atKey: 'Average' put: self average; - atKey: '3rd Qu.' put: self thirdQuartile; - atKey: 'Max' put: self max. + at: 'Min' put: self min; + at: '1st Qu.' put: self firstQuartile; + at: 'Median' put: self median; + at: 'Average' put: self average; + at: '3rd Qu.' put: self thirdQuartile; + at: 'Max' put: self max. ^ summary - ] -{ #category : #accessing } +{ #category : #slicing } DataSeries >> tail [ - ^ self tail: self defaultHeadTailSize. ] -{ #category : #accessing } +{ #category : #slicing } DataSeries >> tail: aNumber [ + ^ self species + withKeys: (self keys copyFrom: self size - aNumber + 1 to: self size) + values: (self values copyFrom: self size - aNumber + 1 to: self size) + name: self name. +] - | n | - - self size > aNumber - ifTrue: [ n := aNumber ] - ifFalse: [ n := self size ]. - - ^ self from: (self size - n + 1) to: (self size). +{ #category : #accessing } +DataSeries >> third [ + "Answer the third element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 3 ] { #category : #statistics } DataSeries >> thirdQuartile [ - ^ self quartile: 3 ] -{ #category : #'as yet unclassified' } -DataSeries >> unique [ - | unique | - unique := self asSet asDataSeries. - unique name: self name. - ^ unique +{ #category : #accessing } +DataSeries >> uniqueValues [ + ^ self asSet asArray +] + +{ #category : #statistics } +DataSeries >> valueCounts [ + ^ (self groupBy: self) count sortDescending +] + +{ #category : #enumerating } +DataSeries >> with: aCollection collect: twoArgBlock [ + "Collect and return the result of evaluating twoArgBlock with corresponding elements from this series and aCollection." + | result | + aCollection size = self size ifFalse: [self errorSizeMismatch]. + + result := self species new: self size. + result name: self name. + + self keys withIndexDo: [ :key :i | + result at: key put: + (twoArgBlock + value: (self at: key) + value: (aCollection at: i))]. + ^ result ] { #category : #enumerating } DataSeries >> withIndexCollect: aBlock [ + | result | + result := self species newFrom: + (self associations withIndexCollect: [:each :i | + each key -> (aBlock value: each value value: i)]). + result name: self name. + ^ result +] - | series | - series := super withIndexCollect: aBlock. - series name: self name. - ^ series +{ #category : #enumerating } +DataSeries >> withIndexDo: aBlock [ + self keys withIndexDo: [ :each :i | aBlock value: (self at: each) value: i ] +] + +{ #category : #enumerating } +DataSeries >> withKeyCollect: aBlock [ + | result | + result := self species newFrom: + (self associations collect: [:each | + each key -> (aBlock value: each value value: each key)]). + result name: self name. + ^ result +] + +{ #category : #enumerating } +DataSeries >> withKeyDo: aBlock [ + self keysDo: [ :each | aBlock value: (self at: each) value: each ] ] { #category : #enumerating } -DataSeries >> withKeyDo: elementAndKeyBlock [ +DataSeries >> withSeries: otherDataSeries collect: twoArgBlock [ + "Collect and return the result of evaluating twoArgBlock with corresponding elements from this series and otherDataSeries." + | result | + otherDataSeries size = self size ifFalse: [self errorSizeMismatch]. + otherDataSeries keys = self keys ifFalse: [ self errorKeysMismatch ]. + + result := self species new: self size. + + result name: ((otherDataSeries name = self name) + ifTrue: [ self name ] + ifFalse: [ self defaultName ]). + + self keysDo: [ :key | + result at: key put: + (twoArgBlock + value: (self at: key) + value: (otherDataSeries at: key))]. + ^ result +] - 1 to: self size do: [ :index | - elementAndKeyBlock - value: (contents at: index) - value: (keys at: index) ]. +{ #category : #statistics } +DataSeries >> zerothQuartile [ + ^ self quartile: 0 ] diff --git a/src/DataFrame/DataSeriesGrouped.class.st b/src/DataFrame/DataSeriesGrouped.class.st index 14db94f4..dda6a849 100644 --- a/src/DataFrame/DataSeriesGrouped.class.st +++ b/src/DataFrame/DataSeriesGrouped.class.st @@ -30,7 +30,7 @@ DataSeriesGrouped >> printOn: aStream [ super printOn: aStream. aStream cr. - groups doWithIndex: [ :eachGroup :i | + groups withIndexDo: [ :eachGroup :i | aStream nextPutAll: (groups keys at: i) asString; nextPutAll: ': '. @@ -48,20 +48,22 @@ DataSeriesGrouped >> split: firstSeries by: secondSeries [ firstSeries size = secondSeries size ifFalse: [ SizeMismatch signal ]. - secondUnique := secondSeries unique asArray. + secondUnique := secondSeries uniqueValues asArray. groups := secondUnique collect: [ :eachUnique | | aList | aList := LinkedList new. - secondSeries doWithIndex: [ :eachSecond :i | + secondSeries withIndexDo: [ :eachSecond :i | eachSecond = eachUnique - ifTrue: [ aList add: (firstSeries at: i) ] ]. + ifTrue: [ aList add: (firstSeries atIndex: i) ] ]. aList asDataSeries ]. - groups := groups asDataSeries. - groups keys: secondUnique. + groups := DataSeries + withKeys: secondUnique + values: groups asDataSeries + name: firstSeries name. ^ self ] diff --git a/src/DataFrame/DataSeriesInternal.class.st b/src/DataFrame/DataSeriesInternal.class.st deleted file mode 100644 index a4957894..00000000 --- a/src/DataFrame/DataSeriesInternal.class.st +++ /dev/null @@ -1,114 +0,0 @@ -" -I am the internal representation of a DataSeries. I store the data very efficiently and allow you to access it very quickly. -" -Class { - #name : #DataSeriesInternal, - #superclass : #SequenceableCollection, - #instVars : [ - 'contents' - ], - #category : #'DataFrame-Core' -} - -{ #category : #'instance creation' } -DataSeriesInternal class >> new: aNumber [ - - ^ self new initialize: aNumber. - -] - -{ #category : #'instance creation' } -DataSeriesInternal class >> withValues: anArray [ - - ^ self new initializeWithArray: anArray. -] - -{ #category : #comparing } -DataSeriesInternal >> = other [ - - ((other class == self class - or: [ other isKindOf: Array ]) - and: [ other size = self size ]) - ifFalse: [ ^ false ]. - - 1 to: self size do: [ :i | - (self at: i) = (other at: i) - ifFalse: [ ^ false ] ]. - - ^ true. - -] - -{ #category : #adding } -DataSeriesInternal >> add: value [ - - contents := contents copyWith: value. -] - -{ #category : #accessing } -DataSeriesInternal >> at: aNumber [ - - ^ contents at: aNumber. -] - -{ #category : #accessing } -DataSeriesInternal >> at: aNumber ifAbsent: aBlock [ - - ^ contents at: aNumber ifAbsent: aBlock. -] - -{ #category : #accessing } -DataSeriesInternal >> at: aNumber put: value [ - - contents at: aNumber put: value. -] - -{ #category : #initialization } -DataSeriesInternal >> initialize [ - - super initialize. - contents := Array new. - ^ self. -] - -{ #category : #initialization } -DataSeriesInternal >> initialize: aNumber [ - - contents := Array new: aNumber. - ^ self. -] - -{ #category : #initialization } -DataSeriesInternal >> initializeWithArray: anArray [ - - anArray isArray - ifFalse: [ Error signal: - 'Only an instance of Array can be accepted as an argument' ]. - - contents := anArray. - ^ self. -] - -{ #category : #copying } -DataSeriesInternal >> postCopy [ - - contents := contents copy. -] - -{ #category : #printing } -DataSeriesInternal >> printOn: aStream [ - - contents printOn: aStream. -] - -{ #category : #removing } -DataSeriesInternal >> removeAt: aNumber [ - - contents := contents copyWithoutIndex: aNumber. -] - -{ #category : #accessing } -DataSeriesInternal >> size [ - - ^ contents size. -]