From 169d63dc17c5fc7bce2a113b8d083cb77b5534c5 Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Tue, 12 Mar 2019 17:22:48 +0100 Subject: [PATCH 1/9] Reimplemented DataSeries as a subclass of OrderedDictionary. Many tests are failing --- src/DataFrame-Tests/DataFrameTest.class.st | 6 +- .../DataSeriesInternalTest.class.st | 139 ---- .../DataSeriesSortableTest.class.st | 28 - src/DataFrame-Tests/DataSeriesTest.class.st | 199 +++--- src/DataFrame/Collection.extension.st | 3 +- src/DataFrame/DataSeries.class.st | 651 ++---------------- src/DataFrame/DataSeriesInternal.class.st | 114 --- 7 files changed, 152 insertions(+), 988 deletions(-) delete mode 100644 src/DataFrame-Tests/DataSeriesInternalTest.class.st delete mode 100644 src/DataFrame/DataSeriesInternal.class.st diff --git a/src/DataFrame-Tests/DataFrameTest.class.st b/src/DataFrame-Tests/DataFrameTest.class.st index 9de2c5c5..f79ae1a9 100644 --- a/src/DataFrame-Tests/DataFrameTest.class.st +++ b/src/DataFrame-Tests/DataFrameTest.class.st @@ -388,9 +388,9 @@ DataFrameTest >> testCollect [ expectedResult columnNames: #(City Population). actualResult := df collect: [ :row | - row atKey: #City put: (row atKey: #City) asUppercase. - row atKey: #Population put: (row atKey: #Population) asInteger. - row removeAtKey: #BeenThere. + row at: #City put: (row at: #City) asUppercase. + row at: #Population put: (row at: #Population) asInteger. + row removeAt: #BeenThere. row ]. self assert: actualResult equals: expectedResult. diff --git a/src/DataFrame-Tests/DataSeriesInternalTest.class.st b/src/DataFrame-Tests/DataSeriesInternalTest.class.st deleted file mode 100644 index cc9beca0..00000000 --- a/src/DataFrame-Tests/DataSeriesInternalTest.class.st +++ /dev/null @@ -1,139 +0,0 @@ -Class { - #name : #DataSeriesInternalTest, - #superclass : #TestCase, - #instVars : [ - 'series' - ], - #category : #'DataFrame-Tests' -} - -{ #category : #initialization } -DataSeriesInternalTest >> setUp [ - - series := DataSeriesInternal - withValues: (10 to: 100 by: 10) asArray. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testAdd [ - - | expected | - - expected := DataSeriesInternal withValues: - #(10 20 30 40 50 60 70 80 90 100 -1). - - series add: -1. - - self assert: series equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testAt [ - - self assert: (series at: 2) equals: 20. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testAtIfAbsent [ - - self assert: (series at: 2) equals: 20. - - self assert: (series at: 100 ifAbsent: [ 'Executing a block' ]) - equals: 'Executing a block'. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testAtPut [ - - | expected | - - series at: 6 put: -2. - expected := DataSeriesInternal withValues: - #(10 20 30 40 50 -2 70 80 90 100). - - self assert: series equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testCollect [ - - | expected actual | - - expected := DataSeriesInternal withValues: (1 to: 10) asArray. - actual := series collect: [ :each | each / 10 ]. - - self assert: actual equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testCollectWithIndex [ - - | expected actual | - - expected := DataSeriesInternal withValues: ((1 to: 10) collect: [ :k | k * 2 ]). - actual := series collectWithIndex: [ :each :i | - each / 10 + i ]. - - self assert: actual equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testDo [ - - | sum | - sum := 0. - - series do: [ :each | - sum := sum + each ]. - - self assert: sum equals: 550. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testDoWithIndex [ - - | sum | - sum := 0. - - series doWithIndex: [ :each :i | - sum := sum + (each / i) ]. - - self assert: sum equals: 100. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testPrintOn [ - - | population expected actual | - population := DataSeriesInternal withValues: #(1.609 2.789 8.788). - - expected := '#(1.609 2.789 8.788)'. - - actual := String new writeStream. - population printOn: actual. - actual := actual contents. - - self assert: actual equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testRemoveAt [ - - | expected | - - expected := DataSeriesInternal withValues: - #(10 30 40 50 60 70 80 90 100). - - series removeAt: 2. - - self assert: series equals: expected. -] - -{ #category : #initialization } -DataSeriesInternalTest >> testVarSizeInstanceCreation [ - - | seriesInternal | - - seriesInternal := DataSeriesInternal new: 10. - self assert: seriesInternal size equals: 10. -] diff --git a/src/DataFrame-Tests/DataSeriesSortableTest.class.st b/src/DataFrame-Tests/DataSeriesSortableTest.class.st index 9b3f62d6..68270a4c 100644 --- a/src/DataFrame-Tests/DataSeriesSortableTest.class.st +++ b/src/DataFrame-Tests/DataSeriesSortableTest.class.st @@ -15,34 +15,6 @@ DataSeriesSortableTest >> setUp [ series name: #TestSeries. ] -{ #category : #tests } -DataSeriesSortableTest >> testIsSorted [ - - | sorted notSorted | - - sorted := #(1 2 3 5 5) asDataSeries. - notSorted := #(3 2 5 1 5) asDataSeries. - - self assert: sorted isSorted. - self assert: notSorted isSorted not. -] - -{ #category : #tests } -DataSeriesSortableTest >> testIsSortedBy [ - - | sortBlock sorted notSorted1 notSorted2 | - - sortBlock := [ :a :b | a > b ]. - - sorted := #(5 4 3 2 1) asDataSeries. - notSorted1 := #(5 5 3 2 1) asDataSeries. - notSorted2 := #(3 2 5 1 5) asDataSeries. - - self assert: (sorted isSortedBy: sortBlock). - self assert: (notSorted1 isSortedBy: sortBlock) not. - self assert: (notSorted2 isSortedBy: sortBlock) not. -] - { #category : #tests } DataSeriesSortableTest >> testSort [ "Should sort this series into ascending order using the '<=' operator. Keys should be reordered together with elements" diff --git a/src/DataFrame-Tests/DataSeriesTest.class.st b/src/DataFrame-Tests/DataSeriesTest.class.st index b5370038..b021679f 100644 --- a/src/DataFrame-Tests/DataSeriesTest.class.st +++ b/src/DataFrame-Tests/DataSeriesTest.class.st @@ -18,19 +18,6 @@ DataSeriesTest >> setUp [ series name: 'ExampleSeries'. ] -{ #category : #tests } -DataSeriesTest >> testAddAtKey [ - - | expected | - - series add: -2 atKey: #X. - expected := #(10 20 30 40 50 60 70 80 90 100 -2) asDataSeries. - expected keys: (keyArray copyWith: #X). - expected name: series name. - - self assert: series equals: expected. -] - { #category : #tests } DataSeriesTest >> testAsDictionary [ | expected actual | @@ -47,24 +34,29 @@ DataSeriesTest >> testAsDictionary [ { #category : #tests } DataSeriesTest >> testAt [ - self assert: (series at: 2) equals: 20. + self assert: (series at: #b) equals: 20. ] { #category : #tests } -DataSeriesTest >> testAtIfAbsent [ +DataSeriesTest >> testAtIndex [ - self assert: (series at: 2) equals: 20. - - self assert: (series at: 100 ifAbsent: [ 'Executing a block' ]) + self assert: (series atIndex: 2) equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testAtIndexIfAbsent [ + + self assert: (series atIndex: 2 ifAbsent: [ 'Executing a block' ]) equals: 20. + self assert: (series atIndex: 100 ifAbsent: [ 'Executing a block' ]) equals: 'Executing a block'. ] { #category : #tests } -DataSeriesTest >> testAtKeyPut [ +DataSeriesTest >> testAtIndexPut [ | expected | - series atKey: #f put: -2. + series atIndex: 6 put: -2. expected := #(10 20 30 40 50 -2 70 80 90 100) asDataSeries. expected keys: keyArray. expected name: series name. @@ -73,26 +65,26 @@ DataSeriesTest >> testAtKeyPut [ ] { #category : #tests } -DataSeriesTest >> testAtKeyPutNewElement [ +DataSeriesTest >> testAtPut [ | expected | - series atKey: #X put: -2. - expected := #(10 20 30 40 50 60 70 80 90 100 -2) asDataSeries. - expected keys: (keyArray copyWith: #X). + series at: #f put: -2. + expected := #(10 20 30 40 50 -2 70 80 90 100) asDataSeries. + expected keys: keyArray. expected name: series name. self assert: series equals: expected. ] { #category : #tests } -DataSeriesTest >> testAtPut [ +DataSeriesTest >> testAtPutNewElement [ | expected | - series at: 6 put: -2. - expected := #(10 20 30 40 50 -2 70 80 90 100) asDataSeries. - expected keys: keyArray. + series at: #X put: -2. + expected := #(10 20 30 40 50 60 70 80 90 100 -2) asDataSeries. + expected keys: (keyArray copyWith: #X). expected name: series name. self assert: series equals: expected. @@ -128,8 +120,8 @@ DataSeriesTest >> testCopy [ seriesCopy := series copy. seriesCopy at: 6 put: -2. - seriesCopy removeAtKey: #i. - seriesCopy add: -3 atKey: #X. + seriesCopy removeAt: #i. + seriesCopy at: #X put: -3. seriesCopy name: #ChangedName. self assert: series equals: expectedOriginal. @@ -144,60 +136,60 @@ DataSeriesTest >> testCreateDataSeriesAsDataSeries [ self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: #(a b c). - self assert: dataSeries keys equals: #(1 2 3) asOrderedCollection. + self assert: dataSeries keys equals: #(1 2 3). self assert: dataSeries name isNil. ] { #category : #tests } -DataSeriesTest >> testCreateDataSeriesWithValues [ +DataSeriesTest >> testCreateDataSeriesWithKeysValues [ - | values dataSeries | + | values keys dataSeries | values := #(a b c). - dataSeries := DataSeries withValues: values. + keys := #(x y z). + + dataSeries := DataSeries + withKeys: keys + values: values. self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: values. - self assert: dataSeries keys equals: #(1 2 3) asOrderedCollection. + self assert: dataSeries keys equals: keys. self assert: dataSeries name isNil. ] { #category : #tests } -DataSeriesTest >> testCreateDataSeriesWithValuesKeys [ +DataSeriesTest >> testCreateDataSeriesWithKeysValuesName [ - | values keys dataSeries | + | values keys name dataSeries | values := #(a b c). - keys := #(x y z) asOrderedCollection. + keys := #(x y z). + name := 'Some data'. dataSeries := DataSeries - withValues: values - keys: keys. + withKeys: keys + values: values + name: name. self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: values. self assert: dataSeries keys equals: keys. - self assert: dataSeries name isNil. + self assert: dataSeries name equals: name. ] { #category : #tests } -DataSeriesTest >> testCreateDataSeriesWithValuesKeysName [ +DataSeriesTest >> testCreateDataSeriesWithValues [ - | values keys name dataSeries | + | values dataSeries | values := #(a b c). - keys := #(x y z) asOrderedCollection. - name := 'Some data'. - - dataSeries := DataSeries - withValues: values - keys: keys - name: name. + dataSeries := DataSeries withValues: values. self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: values. - self assert: dataSeries keys equals: keys. - self assert: dataSeries name equals: name. + self assert: dataSeries keys equals: #(1 2 3). + self assert: dataSeries name isNil. ] { #category : #tests } @@ -214,7 +206,7 @@ DataSeriesTest >> testCreateDataSeriesWithValuesName [ self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: values. - self assert: dataSeries keys equals: #(1 2 3) asOrderedCollection. + self assert: dataSeries keys equals: #(1 2 3). self assert: dataSeries name equals: name. ] @@ -226,21 +218,7 @@ DataSeriesTest >> testCreateEmptyDataSeries [ self assert: dataSeries size equals: 0. self assert: dataSeries asArray equals: #(). - self assert: dataSeries keys equals: #() asOrderedCollection. - self assert: dataSeries name isNil. -] - -{ #category : #tests } -DataSeriesTest >> testCreateEmptyDataSeriesWithGivenSize [ - - | size dataSeries | - - size := 4. - dataSeries := DataSeries new: size. - - self assert: dataSeries size equals: size. - self assert: dataSeries asArray equals: #(nil nil nil nil). - self assert: dataSeries keys equals: #(1 2 3 4) asOrderedCollection. + self assert: dataSeries keys equals: #(). self assert: dataSeries name isNil. ] @@ -315,18 +293,6 @@ DataSeriesTest >> testDo [ self assert: sum equals: 550. ] -{ #category : #tests } -DataSeriesTest >> testDoWithIndex [ - - | sum | - sum := 0. - - series doWithIndex: [ :each :i | - sum := sum + (each / i) ]. - - self assert: sum equals: 100. -] - { #category : #tests } DataSeriesTest >> testEquality [ @@ -371,23 +337,6 @@ DataSeriesTest >> testInequality [ self assert: (a ~= b). ] -{ #category : #tests } -DataSeriesTest >> testPrintOn [ - - | population expected actual | - population := #(1.609 2.789 8.788) asDataSeries. - population keys: #(Barcelona Dubai London). - population name: #Population. - - expected := 'a DataSeries [3 items]'. - - actual := String new writeStream. - population printOn: actual. - actual := actual contents. - - self assert: actual equals: expected. -] - { #category : #tests } DataSeriesTest >> testRemoveAt [ @@ -397,13 +346,13 @@ DataSeriesTest >> testRemoveAt [ expected keys: (keyArray copyWithout: #c). expected name: series name. - series removeAt: 3. + series removeAt: #c. self assert: series equals: expected. ] { #category : #tests } -DataSeriesTest >> testRemoveAtKey [ +DataSeriesTest >> testRemoveAtIndex [ | expected | @@ -411,7 +360,7 @@ DataSeriesTest >> testRemoveAtKey [ expected keys: (keyArray copyWithout: #c). expected name: series name. - series removeAtKey: #c. + series removeAtIndex: 3. self assert: series equals: expected. ] @@ -501,54 +450,74 @@ DataSeriesTest >> testSort [ ] { #category : #tests } -DataSeriesTest >> testSortDescending [ +DataSeriesTest >> testSortBlock [ | cities expected | cities := #(London Dubai Paris Berlin) asDataSeries. cities keys: #(A B C D). cities name: #Cities. - expected := #(Paris London Dubai Berlin) asDataSeries. - expected keys: #(C A B D). + expected := #(Dubai London Berlin Paris) asDataSeries. + expected keys: #(B A D C). expected name: #Cities. - cities sortDescending. + cities sort: [ :a :b | a value last <= b value last ]. self assert: cities equals: expected. ] { #category : #tests } -DataSeriesTest >> testSortUsing [ +DataSeriesTest >> testSortDescending [ | cities expected | cities := #(London Dubai Paris Berlin) asDataSeries. cities keys: #(A B C D). cities name: #Cities. - expected := #(Dubai London Berlin Paris) asDataSeries. - expected keys: #(B A D C). + expected := #(Paris London Dubai Berlin) asDataSeries. + expected keys: #(C A B D). expected name: #Cities. - cities sortUsing: [ :a :b | a last <= b last ]. + cities sortDescending. self assert: cities equals: expected. ] { #category : #tests } -DataSeriesTest >> testVarSizeInstanceCreation [ +DataSeriesTest >> testWithIndexCollect [ + + | actual expected | + + actual := series withIndexCollect: [ :each :i | + each / 10 + i ]. + + expected := (2 to: 20 by: 2) asDataSeries. + expected keys: keyArray. + expected name: series name. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testWithIndexDo [ - | aSeries | - aSeries := DataSeries new: 10. - self assert: aSeries size equals: 10. + | sum | + sum := 0. + + series withIndexDo: [ :each :i | + sum := sum + (each / i) ]. + + self assert: sum equals: 100. ] { #category : #tests } -DataSeriesTest >> testWithIndexCollect [ +DataSeriesTest >> testWithKeyCollect [ | actual expected | - actual := series collectWithIndex: [ :each :i | - each / 10 + i ]. + actual := series withKeyCollect: [ :each :key | + each / 10 + (keyArray indexOf: key) ]. expected := (2 to: 20 by: 2) asDataSeries. + expected keys: keyArray. expected name: series name. self assert: actual equals: expected. diff --git a/src/DataFrame/Collection.extension.st b/src/DataFrame/Collection.extension.st index 61a63d36..4cf98c9e 100644 --- a/src/DataFrame/Collection.extension.st +++ b/src/DataFrame/Collection.extension.st @@ -8,9 +8,8 @@ Collection >> ** arg [ { #category : #'*DataFrame-Core-Base' } Collection >> asDataSeries [ -"Converts a collection to Array and constructs a DataSeries from its values" - ^ DataSeries withValues: self asArray. + ^ self as: DataSeries ] diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index ac0c2086..411e6024 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -1,658 +1,135 @@ -" -I am a data series, suitable for data analysis. -" Class { #name : #DataSeries, - #superclass : #SequenceableCollection, + #superclass : #OrderedDictionary, #instVars : [ - 'contents', - 'keys', 'name' ], #category : #'DataFrame-Core' } { #category : #'instance creation' } -DataSeries class >> new: aNumber [ - - ^ self new initialize: aNumber. - -] - -{ #category : #'instance creation' } -DataSeries class >> withKeys: anArrayOfKeys [ - - | series | - series := self new: anArrayOfKeys size. - series keys: anArrayOfKeys. - ^ series. - - +DataSeries class >> newFrom: aCollection [ + "If it's a collection of associations use the superclass implementation" + ^ super newFrom: ((aCollection first respondsTo: #key) + ifTrue: [ aCollection ] + ifFalse: [ aCollection collectWithIndex: [ :each :i | i -> each ] ]). ] { #category : #'instance creation' } -DataSeries class >> withKeys: anArrayOfKeys name: aName [ - - | series | - series := self withKeys: anArrayOfKeys. - series name: aName. - ^ series. - - +DataSeries class >> withKeys: keys values: values [ + ^ self newFromKeys: keys andValues: values ] { #category : #'instance creation' } -DataSeries class >> withName: aName [ - - | series | - series := self new. - series name: aName. - ^ series. - - +DataSeries class >> withKeys: keys values: values name: aName [ + ^ (self withKeys: keys values: values) name: aName; yourself ] { #category : #'instance creation' } -DataSeries class >> withValues: anArray [ - - | series | - series := self new: anArray size. - series fillWithValuesOf: anArray. - ^ series. - - +DataSeries class >> withValues: values [ + | keys | + keys := (1 to: values size) asArray. + ^ self withKeys: keys values: values ] { #category : #'instance creation' } -DataSeries class >> withValues: anArray keys: anArrayOfKeys [ - - | series | - series := self withValues: anArray. - series keys: anArrayOfKeys. - ^ series. - - -] - -{ #category : #'instance creation' } -DataSeries class >> withValues: anArray keys: anArrayOfKeys name: aName [ - - | series | - series := self withValues: anArray name: aName. - series keys: anArrayOfKeys. - ^ series. - - -] - -{ #category : #'instance creation' } -DataSeries class >> withValues: anArray name: aName [ - - | series | - series := self withValues: anArray. - series name: aName. - ^ series. - - -] - -{ #category : #comparing } -DataSeries >> = otherSeries [ - - (otherSeries isKindOf: DataSeries) - ifFalse: [ ^ false ]. - - "I'm not sure if names should be considered when testing for equality" - "self name = otherSeries name - ifFalse: [ ^ false ]." - - self keys = otherSeries keys - ifFalse: [ ^ false ]. - - ^ (1 to: self size) inject: true into: [ :allEqual :i | - | selfCell otherCell | - selfCell := self at: i. - otherCell := otherSeries at: i. - - (allEqual and: (selfCell = otherCell)) - ifTrue: [ true ] - ifFalse: [ - (selfCell isNumber and: otherCell isNumber) - ifFalse: [ false ] - ifTrue: [ (selfCell isNaN and: otherCell isNaN) ] ] ]. -] - -{ #category : #adding } -DataSeries >> add: aValue atKey: aKey [ - - contents add: aValue. - keys := keys copyWith: aKey. -] - -{ #category : #converting } -DataSeries >> asDataFrame [ - - | df | - df := DataFrame withColumns: { self }. - df columnNames: { self name }. - df rowNames: self keys. - ^ df -] - -{ #category : #converting } -DataSeries >> asDataSeries [ - - ^ DataSeries newFrom: self. -] - -{ #category : #converting } -DataSeries >> asDictionary [ - ^ Dictionary newFromKeys: keys andValues: contents -] - -{ #category : #accessing } -DataSeries >> at: aNumber [ - - ^ contents at: aNumber -] - -{ #category : #accessing } -DataSeries >> at: aNumber put: aValue [ - - contents at: aNumber put: aValue. -] - -{ #category : #accessing } -DataSeries >> atKey: aKey [ - - ^ self atKey: aKey ifAbsent: [ - NotFoundError signal: - 'Key ', - aKey asString, - ' was not found in ', - self class asString ]. -] - -{ #category : #accessing } -DataSeries >> atKey: aKey ifAbsent: exceptionBlock [ - - | index | - index := self indexOfKey: aKey ifAbsent: exceptionBlock. - - "In case contents and keys have different sizes" - ^ contents at: index ifAbsent: exceptionBlock. -] - -{ #category : #accessing } -DataSeries >> atKey: aKey put: aValue [ - - | index | - index := keys indexOf: aKey. - - index = 0 - "a key was not found - create a new element" - ifTrue: [ - self add: aValue atKey: aKey ] - "a key was found - update the value" - ifFalse: [ - self at: index put: aValue ]. -] - -{ #category : #comparing } -DataSeries >> closeTo: otherSeries [ - - (otherSeries isKindOf: DataSeries) - ifFalse: [ ^ false ]. - - "I'm not sure if names should be considered when testing for equality" - self name = otherSeries name - ifFalse: [ ^ false ]. - - self keys = otherSeries keys - ifFalse: [ ^ false ]. - - ^ (1 to: self size) inject: true into: [ :allEqual :i | - | selfCell otherCell | - selfCell := self at: i. - otherCell := otherSeries at: i. - - allEqual and: ((selfCell closeTo: otherCell) or: - ((selfCell isNumber and: otherCell isNumber) and: - (selfCell isNaN and: otherCell isNaN))) ]. -] - -{ #category : #enumerating } -DataSeries >> collect: aBlock [ - - | series | - series := super collect: aBlock. - series name: self name. - series keys: self keys. - ^ series +DataSeries class >> withValues: values name: aName [ + | keys | + keys := (1 to: values size) asArray. + ^ (self withKeys: keys values: values) name: aName; yourself ] { #category : #'as yet unclassified' } -DataSeries >> crossTabulateWith: aSeries [ - - | df | - - (self size = aSeries size) - ifFalse: [ SizeMismatch signal ]. - - "TODO: Rewrite it with DataFrame>>select:" - df := DataFrame withRows: - (self unique asArray collect: [ :each1 | - aSeries unique asArray collect: [ :each2 | - (1 to: self size) inject: 0 into: [ :accum :i | - (((self at: i) = each1) and: ((aSeries at: i) = each2)) - ifTrue: [ accum + 1 ] - ifFalse: [ accum ] ] ] ]). - - df rowNames: self unique asArray. - df columnNames: aSeries unique asArray. - ^ df -] - -{ #category : #accessing } -DataSeries >> defaultHeadTailSize [ - - ^ 5 -] - -{ #category : #'reflective operations' } -DataSeries >> doesNotUnderstand: aMessage [ - - ^ self collect: [ :each | - each - perform: aMessage selector - withArguments: aMessage arguments ] - -] - -{ #category : #initialization } -DataSeries >> fillWithValuesOf: anArray [ -"Fills a newly created series with values of an array. This method is called from the fromArray: class method. It should only be called after initialize: as it is assumed that the memory was already allocated and the size of the self is equal to the size of array. -This method is private and should not be used in custom applications" - - anArray doWithIndex: [ :each :i | - contents at: i put: each ]. -] - -{ #category : #statistics } -DataSeries >> firstQuartile [ - - ^ self quartile: 1 -] - -{ #category : #statistics } -DataSeries >> frequencyTable [ - - | df count proportion | - - count := (self groupBy: self) count. - proportion := count / self size. - - df := DataFrame withColumns: - { count . proportion }. - - df rowNames: self unique asArray. - df columnNames: #(Count Proportion). - - ^ df -] - -{ #category : #accessing } -DataSeries >> from: start to: end [ - - | series | - series := ((start to: end) collect: [ :i | - self at: i]) asDataSeries. - - series name: self name. - series keys: (self keys copyFrom: start to: end). - ^ series. +DataSeries >> atIndex: aNumber [ + ^ self at: (self keys at: aNumber) ] { #category : #'as yet unclassified' } -DataSeries >> groupBy: aSeries [ - - ^ DataSeriesGrouped group: self by: aSeries -] - -{ #category : #'gt-inspector-extension' } -DataSeries >> gtInspectorItemsIn: composite [ - self asDictionary gtInspectorItemsIn: composite +DataSeries >> atIndex: aNumber ifAbsent: aBlock [ + ^ [ self at: (self keys at: aNumber) ] on: SubscriptOutOfBounds do: aBlock ] -{ #category : #accessing } -DataSeries >> head [ - - ^ self head: self defaultHeadTailSize. +{ #category : #'as yet unclassified' } +DataSeries >> atIndex: aNumber put: aValue [ + ^ self at: (self keys at: aNumber) put: aValue ] { #category : #accessing } -DataSeries >> head: aNumber [ - - | n | - - self size > aNumber - ifTrue: [ n := aNumber ] - ifFalse: [ n := self size ]. - - ^ self from: 1 to: n. -] - -{ #category : #private } -DataSeries >> indexOfKey: aKey [ - - ^ self indexOfKey: aKey ifAbsent: [ - NotFoundError signal: - 'Key ', - aKey asString, - ' was not found in ', - self class asString ]. -] - -{ #category : #private } -DataSeries >> indexOfKey: aKey ifAbsent: aBlock [ - - | index | - index := keys indexOf: aKey. - - index = 0 - ifTrue: [ ^ aBlock value ]. - - ^ index +DataSeries >> defaultName [ + ^ '(no name)' ] { #category : #initialization } -DataSeries >> initialize [ - +DataSeries >> initialize [ super initialize. - contents := DataSeriesInternal new. - keys := OrderedCollection new. -] - -{ #category : #initialization } -DataSeries >> initialize: aNumber [ -"Creates an empty DataSeries of a given size. Keys are set to their default values" - - contents := DataSeriesInternal new: aNumber. - self setDefaultKeys. - ^ self -] - -{ #category : #statistics } -DataSeries >> interquartileRange [ - - ^ self thirdQuartile - self firstQuartile + name := self defaultName. ] -{ #category : #accessing } -DataSeries >> keyAtValue: value [ - - ^ self keyAtValue: value ifAbsent: [ ValueNotFound signalFor: value ] -] - -{ #category : #accessing } -DataSeries >> keyAtValue: value ifAbsent: exceptionBlock [ - - | index | - index := contents indexOf: value. - - index = 0 - ifTrue: [ ^ exceptionBlock value ] - ifFalse: [ ^ keys at: index ]. -] - -{ #category : #accessing } -DataSeries >> keys [ - - ^ keys -] - -{ #category : #accessing } -DataSeries >> keys: anArray [ - - keys := anArray asOrderedCollection. -] - -{ #category : #sorting } -DataSeries >> mergeFirst: first middle: middle last: last into: dst by: aBlock [ - "Private. Merge the sorted ranges [first..middle] and [middle+1..last] - of the receiver into the range [first..last] of dst." - - | i1 i2 key1 key2 val1 val2 out | - i1 := first. - i2 := middle + 1. - key1 := self keys at: i1. - key2 := self keys at: i2. - val1 := self at: i1. - val2 := self at: i2. - out := first - 1. "will be pre-incremented" - - "select 'lower' half of the elements based on comparator" - [ (i1 <= middle) and: [i2 <= last] ] - whileTrue: [ - out := out + 1. - - (aBlock value: val1 value: val2) - ifTrue: [ - dst at: out put: val1. - dst keys at: out put: key1. - i1 := i1 + 1. - val1 := self at: i1. - key1 := self keys at: i1 ] - - ifFalse: [ - dst at: out put: val2. - dst keys at: out put: key2. - i2 := i2 + 1. - i2 <= last - ifTrue: [ - val2 := self at: i2. - key2 := self keys at: i2 ] ] ]. - - "copy the remaining elements" - i1 <= middle - ifTrue: [dst replaceFrom: out + 1 to: last with: self startingAt: i1] - ifFalse: [dst replaceFrom: out + 1 to: last with: self startingAt: i2] -] - -{ #category : #statistics } -DataSeries >> mode [ - - | valueCounts maxCount | - - valueCounts := (self groupBy: self) count. - maxCount := valueCounts max. - - ^ valueCounts keyAtValue: maxCount. +{ #category : #'as yet unclassified' } +DataSeries >> keys: anArrayOfKeys [ + | keys | + keys := anArrayOfKeys deepCopy. + dictionary := self dictionaryClass newFromKeys: keys andValues: self values. + orderedKeys := keys. ] { #category : #accessing } DataSeries >> name [ - ^ name ] { #category : #accessing } -DataSeries >> name: aString [ - - name := aString -] - -{ #category : #copying } -DataSeries >> postCopy [ - - keys := keys copy. - contents := contents copy. -] - -{ #category : #printing } -DataSeries >> printOn: aStream [ - - | title | - title := self class name. - - aStream - nextPutAll: (title first isVowel ifTrue: ['an '] ifFalse: ['a ']); - nextPutAll: title; - nextPutAll: ' ['; - nextPutAll: self size asString; - nextPutAll: (self size % 10 = 1 ifTrue: [' item]'] ifFalse: [' items]']). -] - -{ #category : #statistics } -DataSeries >> quantile: number [ - - | sortedSeries index | - - sortedSeries := self asArray sort asDataSeries. - - number = 0 - ifTrue: [ ^ sortedSeries first ]. - - index := (sortedSeries size * (number / 100)) ceiling. - ^ sortedSeries at: index. -] - -{ #category : #statistics } -DataSeries >> quartile: number [ - - ^ self quantile: (25 * number) +DataSeries >> name: anObject [ + name := anObject ] { #category : #removing } -DataSeries >> removeAt: aNumber [ - - contents removeAt: aNumber. - keys := keys copyWithoutIndex: aNumber. +DataSeries >> removeAt: aKey [ + ^ self removeKey: aKey ] { #category : #removing } -DataSeries >> removeAtKey: aKey [ - - | index | - index := self indexOfKey: aKey. - self removeAt: index. -] - -{ #category : #accessing } -DataSeries >> replaceFrom: start to: stop with: replacement [ - - (replacement isKindOf: self class) - ifTrue: [ - keys - replaceFrom: start - to: stop - with: replacement keys ]. - - super replaceFrom: start to: stop with: replacement. -] - -{ #category : #accessing } -DataSeries >> replaceFrom: start to: stop with: replacement startingAt: repStart [ - - (replacement isKindOf: self class) - ifTrue: [ - keys - replaceFrom: start - to: stop - with: replacement keys - startingAt: repStart ]. - - super replaceFrom: start to: stop with: replacement startingAt: repStart . -] - -{ #category : #initialization } -DataSeries >> setDefaultKeys [ -"Sets the keys of a series to their default values - to an array of numbers from 1 to self size. This method can be reimplemented by a subclass to provide different default keys" - - keys := (1 to: self size) asOrderedCollection. -] - -{ #category : #accessing } -DataSeries >> size [ - - ^ contents size +DataSeries >> removeAtIndex: aNumber [ + ^ self removeAt: (self keys at: aNumber) ] { #category : #sorting } -DataSeries >> sortDescending [ - ^ self sortUsing: [ :a :b | a >= b ] +DataSeries >> sort [ + ^ self sort: [ :a :b | a value <= b value ] ] { #category : #sorting } -DataSeries >> sortUsing: aBlock [ - ^ self sort: aBlock -] - -{ #category : #accessing } -DataSeries >> summary [ - - | summary | - summary := self class new. - summary name: self name. - - summary - atKey: 'Min' put: self min; - atKey: '1st Qu.' put: self firstQuartile; - atKey: 'Median' put: self median; - atKey: 'Average' put: self average; - atKey: '3rd Qu.' put: self thirdQuartile; - atKey: 'Max' put: self max. - - ^ summary - +DataSeries >> sort: aBlock [ + | sortedAssociations | + sortedAssociations := self associations sort: aBlock. + ^ sortedAssociations asDataSeries name: self name; yourself ] -{ #category : #accessing } -DataSeries >> tail [ - - ^ self tail: self defaultHeadTailSize. -] - -{ #category : #accessing } -DataSeries >> tail: aNumber [ - - | n | - - self size > aNumber - ifTrue: [ n := aNumber ] - ifFalse: [ n := self size ]. - - ^ self from: (self size - n + 1) to: (self size). +{ #category : #sorting } +DataSeries >> sortDescending [ + ^ self sort: [ :a :b | a value > b value ] ] -{ #category : #statistics } -DataSeries >> thirdQuartile [ - - ^ self quartile: 3 +{ #category : #enumerating } +DataSeries >> withIndexCollect: aBlock [ + ^ self species newFrom: + (self associations withIndexCollect: [:each :i | + each key -> (aBlock value: each value value: i)]) ] -{ #category : #'as yet unclassified' } -DataSeries >> unique [ - | unique | - unique := self asSet asDataSeries. - unique name: self name. - ^ unique +{ #category : #enumerating } +DataSeries >> withIndexDo: aBlock [ + self keys withIndexDo: [ :each :i | aBlock value: (self at: each) value: i ] ] { #category : #enumerating } -DataSeries >> withIndexCollect: aBlock [ - - | series | - series := super withIndexCollect: aBlock. - series name: self name. - ^ series +DataSeries >> withKeyCollect: aBlock [ + ^ self species newFrom: + (self associations collect: [:each | + each key -> (aBlock value: each value value: each key)]) ] { #category : #enumerating } -DataSeries >> withKeyDo: elementAndKeyBlock [ - - 1 to: self size do: [ :index | - elementAndKeyBlock - value: (contents at: index) - value: (keys at: index) ]. +DataSeries >> withKeyDo: aBlock [ + self keysDo: [ :each | aBlock value: (self at: each) value: each ] ] diff --git a/src/DataFrame/DataSeriesInternal.class.st b/src/DataFrame/DataSeriesInternal.class.st deleted file mode 100644 index a4957894..00000000 --- a/src/DataFrame/DataSeriesInternal.class.st +++ /dev/null @@ -1,114 +0,0 @@ -" -I am the internal representation of a DataSeries. I store the data very efficiently and allow you to access it very quickly. -" -Class { - #name : #DataSeriesInternal, - #superclass : #SequenceableCollection, - #instVars : [ - 'contents' - ], - #category : #'DataFrame-Core' -} - -{ #category : #'instance creation' } -DataSeriesInternal class >> new: aNumber [ - - ^ self new initialize: aNumber. - -] - -{ #category : #'instance creation' } -DataSeriesInternal class >> withValues: anArray [ - - ^ self new initializeWithArray: anArray. -] - -{ #category : #comparing } -DataSeriesInternal >> = other [ - - ((other class == self class - or: [ other isKindOf: Array ]) - and: [ other size = self size ]) - ifFalse: [ ^ false ]. - - 1 to: self size do: [ :i | - (self at: i) = (other at: i) - ifFalse: [ ^ false ] ]. - - ^ true. - -] - -{ #category : #adding } -DataSeriesInternal >> add: value [ - - contents := contents copyWith: value. -] - -{ #category : #accessing } -DataSeriesInternal >> at: aNumber [ - - ^ contents at: aNumber. -] - -{ #category : #accessing } -DataSeriesInternal >> at: aNumber ifAbsent: aBlock [ - - ^ contents at: aNumber ifAbsent: aBlock. -] - -{ #category : #accessing } -DataSeriesInternal >> at: aNumber put: value [ - - contents at: aNumber put: value. -] - -{ #category : #initialization } -DataSeriesInternal >> initialize [ - - super initialize. - contents := Array new. - ^ self. -] - -{ #category : #initialization } -DataSeriesInternal >> initialize: aNumber [ - - contents := Array new: aNumber. - ^ self. -] - -{ #category : #initialization } -DataSeriesInternal >> initializeWithArray: anArray [ - - anArray isArray - ifFalse: [ Error signal: - 'Only an instance of Array can be accepted as an argument' ]. - - contents := anArray. - ^ self. -] - -{ #category : #copying } -DataSeriesInternal >> postCopy [ - - contents := contents copy. -] - -{ #category : #printing } -DataSeriesInternal >> printOn: aStream [ - - contents printOn: aStream. -] - -{ #category : #removing } -DataSeriesInternal >> removeAt: aNumber [ - - contents := contents copyWithoutIndex: aNumber. -] - -{ #category : #accessing } -DataSeriesInternal >> size [ - - ^ contents size. -] From efd05894763aea0eb6240fc3f18b7f9f17f550b4 Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Wed, 13 Mar 2019 20:41:53 +0100 Subject: [PATCH 2/9] Fixed all DataSeries tests --- .../DataFrameAggrGroupTest.class.st | 19 - .../DataFrameQueriesTest.class.st | 2 +- .../DataFrameStatsTest.class.st | 2 +- .../DataSeriesMathTest.class.st | 89 -- .../DataSeriesSortableTest.class.st | 79 -- .../DataSeriesStatsTest.class.st | 113 --- src/DataFrame-Tests/DataSeriesTest.class.st | 780 ++++++++++++++---- src/DataFrame/Collection.extension.st | 17 + src/DataFrame/DataFrame.class.st | 13 +- src/DataFrame/DataFrameGrouped.class.st | 10 +- src/DataFrame/DataGrouped.class.st | 14 +- src/DataFrame/DataSeries.class.st | 330 +++++++- src/DataFrame/DataSeriesGrouped.class.st | 14 +- 13 files changed, 975 insertions(+), 507 deletions(-) delete mode 100644 src/DataFrame-Tests/DataSeriesMathTest.class.st delete mode 100644 src/DataFrame-Tests/DataSeriesSortableTest.class.st delete mode 100644 src/DataFrame-Tests/DataSeriesStatsTest.class.st diff --git a/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st b/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st index 31ba9c01..1f47c481 100644 --- a/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st +++ b/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st @@ -78,25 +78,6 @@ DataFrameAggrGroupTest >> testDataFrameGroupedPrintOn [ self assert: actual equals: expected. ] -{ #category : #initialization } -DataFrameAggrGroupTest >> testDataSeriesGroupedPrintOn [ - - | expected actual | - - expected := String new writeStream. - expected - nextPutAll: 'a DataSeriesGrouped'; cr; - nextPutAll: ('Male: a DataSeries [3 items]'); cr; - nextPutAll: ('Female: a DataSeries [2 items]'). - expected := expected contents. - - actual := String new writeStream. - (df group: #total_bill by: #sex) printOn: actual. - actual := actual contents. - - self assert: actual equals: expected. -] - { #category : #tests } DataFrameAggrGroupTest >> testGroupSeriesBySeries [ diff --git a/src/DataFrame-Tests/DataFrameQueriesTest.class.st b/src/DataFrame-Tests/DataFrameQueriesTest.class.st index 9ee0842d..b888d8ed 100644 --- a/src/DataFrame-Tests/DataFrameQueriesTest.class.st +++ b/src/DataFrame-Tests/DataFrameQueriesTest.class.st @@ -44,7 +44,7 @@ DataFrameQueriesTest >> testSelect [ actual := df select: [ :row | - (row atKey: #a) = 'x' and: (row atKey: #b) < 30 ]. + (row at: #a) = 'x' and: (row at: #b) < 30 ]. expected := DataFrame withRows: #( (x 10 0.25 0.1) diff --git a/src/DataFrame-Tests/DataFrameStatsTest.class.st b/src/DataFrame-Tests/DataFrameStatsTest.class.st index 1ec16d60..82ceb5b3 100644 --- a/src/DataFrame-Tests/DataFrameStatsTest.class.st +++ b/src/DataFrame-Tests/DataFrameStatsTest.class.st @@ -107,7 +107,7 @@ DataFrameStatsTest >> testMode [ | expected actual | - expected := { 4.7 . 3.2 . 1.4 . 0.2 } asDataSeries. + expected := { 6.3 . 3.2 . 1.4 . 0.2 } asDataSeries. expected name: #mode. expected keys: df columnNames. diff --git a/src/DataFrame-Tests/DataSeriesMathTest.class.st b/src/DataFrame-Tests/DataSeriesMathTest.class.st deleted file mode 100644 index 0426ea78..00000000 --- a/src/DataFrame-Tests/DataSeriesMathTest.class.st +++ /dev/null @@ -1,89 +0,0 @@ -Class { - #name : #DataSeriesMathTest, - #superclass : #TestCase, - #category : #'DataFrame-Tests' -} - -{ #category : #tests } -DataSeriesMathTest >> testAddElementwise [ - - | a b c | - - a := #(1 2 3) asDataSeries. - b := #(3 4 5) asDataSeries. - c := #(4 6 8) asDataSeries. - - self assert: a + b equals: c. -] - -{ #category : #tests } -DataSeriesMathTest >> testCos [ - - | a b pi | - - pi := Float pi. - a := { 0 . pi . pi/2 . pi/4 . pi/3 } asDataSeries. - b := { 1.0 . -1.0 . 0.0 . 1/2 sqrt . 0.5 } asDataSeries. - - self assert: a cos closeTo: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testDivideByScalar [ - - | a b | - - a := #(1 2 3) asDataSeries. - b := #(0.5 1 1.5) asDataSeries. - - self assert: a / 2 equals: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testMultiplyScalar [ - - | a b | - - a := #(1 2 3) asDataSeries. - b := #(2 4 6) asDataSeries. - - self assert: 2 * a equals: b. - self assert: a * 2 equals: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testPowerScalar [ - - | a b | - - a := #(1 2 3) asDataSeries. - b := #(1 4 9) asDataSeries. - - self assert: a ** 2 equals: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testSin [ - - | a b pi | - - pi := Float pi. - a := { 0 . pi . pi/2 . pi/4 . pi/6 } asDataSeries. - b := { 0.0 . 0.0 . 1.0 . 1/2 sqrt . 0.5 } asDataSeries. - - self assert: a sin closeTo: b. -] - -{ #category : #tests } -DataSeriesMathTest >> testSubtractElementwise [ - - | a b c d | - - a := #(1 2 3) asDataSeries. - b := #(3 6 8) asDataSeries. - c := #(-2 -4 -5) asDataSeries. - d := #(2 4 5) asDataSeries. - - self assert: a - b equals: c. - self assert: b - a equals: d. -] diff --git a/src/DataFrame-Tests/DataSeriesSortableTest.class.st b/src/DataFrame-Tests/DataSeriesSortableTest.class.st deleted file mode 100644 index 68270a4c..00000000 --- a/src/DataFrame-Tests/DataSeriesSortableTest.class.st +++ /dev/null @@ -1,79 +0,0 @@ -Class { - #name : #DataSeriesSortableTest, - #superclass : #TestCase, - #instVars : [ - 'series' - ], - #category : #'DataFrame-Tests' -} - -{ #category : #tests } -DataSeriesSortableTest >> setUp [ - - series := #(3 2 4 5 1 3 2 5 5 2 1) asDataSeries. - series keys: #(a b c d e f g h i j k). - series name: #TestSeries. -] - -{ #category : #tests } -DataSeriesSortableTest >> testSort [ -"Should sort this series into ascending order using the '<=' operator. Keys should be reordered together with elements" - - | expected | - - series sort. - - expected := #(1 1 2 2 2 3 3 4 5 5 5) asDataSeries. - expected keys: #(e k b g j a f c d h i). - expected name: series name. - - self assert: series equals: expected. -] - -{ #category : #tests } -DataSeriesSortableTest >> testSortBy [ -"Should sort this series using the given sortBlock. Keys should be reordered together with elements" - - | expected | - - series sort: [ :a :b | a >= b ]. - - expected := #(5 5 5 4 3 3 2 2 2 1 1) asDataSeries. - expected keys: #(d h i c a f b g j e k). - expected name: series name. - - self assert: series equals: expected. -] - -{ #category : #tests } -DataSeriesSortableTest >> testSorted [ -"Should return a new series which contains the same elements as self but its elements are sorted in ascending order using the #'<=' operator. Keys should be reordered together with elements. The initial series should not be affected" - - | actual expected | - - actual := series sorted. - - expected := #(1 1 2 2 2 3 3 4 5 5 5) asDataSeries. - expected keys: #(e k b g j a f c d h i). - expected name: series name. - - self assert: actual equals: expected. - self assert: series isSorted not. -] - -{ #category : #tests } -DataSeriesSortableTest >> testSortedBy [ -"Should return a new series which contains the same elements as self but its elements are sorted using the given sortBlock. Keys should be reordered together with elements. The initial series should not be affected" - - | sortBlock actual expected | - - sortBlock := [ :a :b | a >= b ]. - actual := series sorted: sortBlock. - - expected := #(5 5 5 4 3 3 2 2 2 1 1) asDataSeries. - expected keys: #(d h i c a f b g j e k). - expected name: series name. - - self assert: actual equals: expected. - self assert: (series isSortedBy: sortBlock) not. -] diff --git a/src/DataFrame-Tests/DataSeriesStatsTest.class.st b/src/DataFrame-Tests/DataSeriesStatsTest.class.st deleted file mode 100644 index 71c0d43d..00000000 --- a/src/DataFrame-Tests/DataSeriesStatsTest.class.st +++ /dev/null @@ -1,113 +0,0 @@ -Class { - #name : #DataSeriesStatsTest, - #superclass : #TestCase, - #instVars : [ - 'series' - ], - #category : #'DataFrame-Tests' -} - -{ #category : #initialization } -DataSeriesStatsTest >> setUp [ - - series := #(3 7 6 20 8 9 8 10 15 13 16) asDataSeries. -] - -{ #category : #tests } -DataSeriesStatsTest >> testAverage [ - - self assert: series average equals: (115/11). -] - -{ #category : #tests } -DataSeriesStatsTest >> testFirstQuartile [ - - self assert: series firstQuartile equals: 7. -] - -{ #category : #tests } -DataSeriesStatsTest >> testInterquartileRange [ - - self assert: series interquartileRange equals: 8. -] - -{ #category : #tests } -DataSeriesStatsTest >> testMax [ - - self assert: series max equals: 20. -] - -{ #category : #tests } -DataSeriesStatsTest >> testMedian [ - - self assert: series median equals: 9. -] - -{ #category : #tests } -DataSeriesStatsTest >> testMin [ - - self assert: series min equals: 3. -] - -{ #category : #tests } -DataSeriesStatsTest >> testMode [ - - self assert: series mode equals: 8. -] - -{ #category : #tests } -DataSeriesStatsTest >> testQuantile [ - - self assert: (series quantile: 0) equals: 3. - self assert: (series quantile: 10) equals: 6. - self assert: (series quantile: 25) equals: 7. - self assert: (series quantile: 50) equals: 9. - self assert: (series quantile: 75) equals: 15. - self assert: (series quantile: 100) equals: 20. -] - -{ #category : #tests } -DataSeriesStatsTest >> testQuartile [ - - self assert: (series quartile: 0) equals: 3. - self assert: (series quartile: 1) equals: 7. - self assert: (series quartile: 2) equals: 9. - self assert: (series quartile: 3) equals: 15. - self assert: (series quartile: 4) equals: 20. -] - -{ #category : #tests } -DataSeriesStatsTest >> testRange [ - - self assert: series range equals: 17. -] - -{ #category : #tests } -DataSeriesStatsTest >> testStdev [ - - self assert: series stdev closeTo: 5.00727. -] - -{ #category : #tests } -DataSeriesStatsTest >> testSummary [ - - | expected actual | - - expected := { 3.0 . 7.0 . 9.0 . (115 / 11) asFloat . 15.0 . 20.0 } asDataSeries. - expected keys: #(Min '1st Qu.' Median Average '3rd Qu.' Max). - actual := series summary collect: #asFloat. - - self assert: actual equals: expected. -] - -{ #category : #tests } -DataSeriesStatsTest >> testThirdQuartile [ - - self assert: series thirdQuartile equals: 15. -] - -{ #category : #tests } -DataSeriesStatsTest >> testVariance [ - - self assert: series variance closeTo: 25.07273. -] diff --git a/src/DataFrame-Tests/DataSeriesTest.class.st b/src/DataFrame-Tests/DataSeriesTest.class.st index b021679f..a382cc96 100644 --- a/src/DataFrame-Tests/DataSeriesTest.class.st +++ b/src/DataFrame-Tests/DataSeriesTest.class.st @@ -10,12 +10,100 @@ Class { { #category : #initialization } DataSeriesTest >> setUp [ + keyArray := #(a b c d e f g h i j k). + + series := DataSeries + withKeys: keyArray + values: #(3 7 6 20 8 9 8 10 15 13 16) + name: 'ExampleSeries'. +] + +{ #category : #tests } +DataSeriesTest >> testAddArrayToSeries [ + | series array actual expected | + + series := DataSeries withKeys: #(a b c) values: #(1 2 3) name: #X. + array := #(3 4 5). + + expected := DataSeries withKeys: #(a b c) values: #(4 6 8) name: #X. + actual := series + array. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testAddScalarToSeries [ + | series scalar actual expected | + + series := DataSeries withKeys: #(a b c) values: #(1 2 3) name: #X. + scalar := 10. + + expected := DataSeries withKeys: #(a b c) values: #(11 12 13) name: #X. + actual := series + scalar. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testAddSeriesToArray [ + | array series actual expected | + + array := #(1 2 3). + series := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #X. + + expected := DataSeries withKeys: #(a b c) values: #(4 6 8) name: #X. + actual := array + series. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testAddSeriesToScalar [ + | scalar series actual expected | + + scalar := 10. + series := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #X. + + expected := DataSeries withKeys: #(a b c) values: #(13 14 15) name: #X. + actual := scalar + series. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testAddSeriesToSeriesDifferentKeys [ + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(x y z) values: #(1 2 3) name: #X. + secondSeries := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #X. + + self should: [ firstSeries + secondSeries ] raise: Error. +] + +{ #category : #tests } +DataSeriesTest >> testAddSeriesToSeriesSameKeysAndName [ + | firstSeries secondSeries actual expected | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 2 3) name: #X. + secondSeries := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #X. + + expected := DataSeries withKeys: #(a b c) values: #(4 6 8) name: #X. + actual := firstSeries + secondSeries. + + self assert: actual equals: expected. +] - series := (10 to: 100 by: 10) asDataSeries. - keyArray := #(a b c d e f g h i j). +{ #category : #tests } +DataSeriesTest >> testAddSeriesToSeriesSameKeysDifferentName [ + | firstSeries secondSeries actual expected | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 2 3) name: #X. + secondSeries := DataSeries withKeys: #(a b c) values: #(3 4 5) name: #Y. - series keys: keyArray. - series name: 'ExampleSeries'. + expected := DataSeries withKeys: #(a b c) values: #(4 6 8) name: '(no name)'. + actual := firstSeries + secondSeries. + + self assert: actual equals: expected. ] { #category : #tests } @@ -23,9 +111,9 @@ DataSeriesTest >> testAsDictionary [ | expected actual | expected := { - 'a' -> 10 . 'b' -> 20 . 'c' -> 30 . 'd' -> 40 . - 'e' -> 50 . 'f' -> 60 . 'g' -> 70 . 'h' -> 80 . - 'i' -> 90 . 'j' -> 100 } asDictionary. + 'a' -> 3 . 'b' -> 7 . 'c' -> 6 . 'd' -> 20 . + 'e' -> 8 . 'f' -> 9 . 'g' -> 8 . 'h' -> 10 . + 'i' -> 15 . 'j' -> 13 . 'k' -> 16 } asDictionary. actual := series asDictionary. self assert: actual equals: expected @@ -34,19 +122,19 @@ DataSeriesTest >> testAsDictionary [ { #category : #tests } DataSeriesTest >> testAt [ - self assert: (series at: #b) equals: 20. + self assert: (series at: #b) equals: 7. ] { #category : #tests } DataSeriesTest >> testAtIndex [ - self assert: (series atIndex: 2) equals: 20. + self assert: (series atIndex: 2) equals: 7. ] { #category : #tests } DataSeriesTest >> testAtIndexIfAbsent [ - self assert: (series atIndex: 2 ifAbsent: [ 'Executing a block' ]) equals: 20. + self assert: (series atIndex: 2 ifAbsent: [ 'Executing a block' ]) equals: 7. self assert: (series atIndex: 100 ifAbsent: [ 'Executing a block' ]) equals: 'Executing a block'. ] @@ -56,10 +144,12 @@ DataSeriesTest >> testAtIndexPut [ | expected | - series atIndex: 6 put: -2. - expected := #(10 20 30 40 50 -2 70 80 90 100) asDataSeries. - expected keys: keyArray. - expected name: series name. + series atIndex: 6 put: -2. + + expected := DataSeries + withKeys: keyArray + values: #(3 7 6 20 8 -2 8 10 15 13 16) + name: series name. self assert: series equals: expected. ] @@ -70,9 +160,11 @@ DataSeriesTest >> testAtPut [ | expected | series at: #f put: -2. - expected := #(10 20 30 40 50 -2 70 80 90 100) asDataSeries. - expected keys: keyArray. - expected name: series name. + + expected := DataSeries + withKeys: keyArray + values: #(3 7 6 20 8 -2 8 10 15 13 16) + name: series name. self assert: series equals: expected. ] @@ -83,13 +175,21 @@ DataSeriesTest >> testAtPutNewElement [ | expected | series at: #X put: -2. - expected := #(10 20 30 40 50 60 70 80 90 100 -2) asDataSeries. - expected keys: (keyArray copyWith: #X). - expected name: series name. + + expected := DataSeries + withKeys: (keyArray copyWith: #X) + values: #(3 7 6 20 8 9 8 10 15 13 16 -2) + name: series name. self assert: series equals: expected. ] +{ #category : #tests } +DataSeriesTest >> testAverage [ + + self assert: series average equals: (115/11). +] + { #category : #tests } DataSeriesTest >> testCollect [ @@ -98,34 +198,76 @@ DataSeriesTest >> testCollect [ actual := series collect: [ :each | each / 10 ]. - expected := (1 to: 10) asDataSeries. - expected keys: keyArray. - expected name: series name. + expected := DataSeries + withKeys: keyArray + values: { 3/10 . 7/10 . 3/5 . 2 . 4/5 . 9/10 . 4/5 . 1 . 3/2 . 13/10 . 8/5 } + name: 'ExampleSeries'. self assert: actual equals: expected. ] { #category : #tests } -DataSeriesTest >> testCopy [ +DataSeriesTest >> testCopyCanBeChanged [ - | seriesCopy expectedOriginal expectedCopy | + | original seriesCopy expectedCopyAfterChange | - expectedOriginal := #(10 20 30 40 50 60 70 80 90 100) asDataSeries. - expectedOriginal keys: keyArray. - expectedOriginal name: series name. + original := DataSeries + withKeys: #(a b c d) + values: #(10 30 20 40) + name: #X. + + expectedCopyAfterChange := DataSeries + withKeys: #(b c d e) + values: #(100 20 40 200) + name: #Y. - expectedCopy := #(10 20 30 40 50 -2 70 80 100 -3) asDataSeries. - expectedCopy keys: ((keyArray copyWithout: #i) copyWith: #X). - expectedCopy name: #ChangedName. + seriesCopy := original copy. + + seriesCopy at: #b put: 100. + seriesCopy at: #e put: 200. + seriesCopy removeAt: #a. + seriesCopy name: #Y. - seriesCopy := series copy. - seriesCopy at: 6 put: -2. - seriesCopy removeAt: #i. - seriesCopy at: #X put: -3. - seriesCopy name: #ChangedName. + self assert: seriesCopy equals: expectedCopyAfterChange. - self assert: series equals: expectedOriginal. - self assert: seriesCopy equals: expectedCopy. +] + +{ #category : #tests } +DataSeriesTest >> testCopyChangeDoesNotAffectOriginal [ + + | original seriesCopy expectedOriginalAfterChange | + + original := DataSeries + withKeys: #(a b c d) + values: #(10 30 20 40) + name: #X. + + expectedOriginalAfterChange := DataSeries + withKeys: #(a b c d) + values: #(10 30 20 40) + name: #X. + + seriesCopy := original copy. + + seriesCopy at: #b put: 100. + seriesCopy at: #e put: 200. + seriesCopy removeAt: #a. + seriesCopy name: #Y. + + self assert: original equals: expectedOriginalAfterChange. + +] + +{ #category : #tests } +DataSeriesTest >> testCos [ + + | a b pi | + + pi := Float pi. + a := { 0 . pi . pi/2 . pi/4 . pi/3 } asDataSeries. + b := { 1.0 . -1.0 . 0.0 . 1/2 sqrt . 0.5 } asDataSeries. + + self assert: a cos closeTo: b. ] { #category : #tests } @@ -222,49 +364,6 @@ DataSeriesTest >> testCreateEmptyDataSeries [ self assert: dataSeries name isNil. ] -{ #category : #tests } -DataSeriesTest >> testCreateEmptyDataSeriesWithKeys [ - - | keys dataSeries | - - keys := #(x y z) asOrderedCollection. - dataSeries := DataSeries withKeys: keys. - - self assert: dataSeries size equals: 3. - self assert: dataSeries asArray equals: #(nil nil nil). - self assert: dataSeries keys equals: keys. - self assert: dataSeries name isNil. -] - -{ #category : #tests } -DataSeriesTest >> testCreateEmptyDataSeriesWithKeysName [ - - | keys name dataSeries | - - keys := #(x y z) asOrderedCollection. - name := 'Some data'. - dataSeries := DataSeries withKeys: keys name: name. - - self assert: dataSeries size equals: 3. - self assert: dataSeries asArray equals: #(nil nil nil). - self assert: dataSeries keys equals: keys. - self assert: dataSeries name equals: name. -] - -{ #category : #tests } -DataSeriesTest >> testCreateEmptyDataSeriesWithName [ - - | name dataSeries | - - name := 'Some data'. - dataSeries := DataSeries withName: name. - - self assert: dataSeries size equals: 0. - self assert: dataSeries asArray equals: #(). - self assert: dataSeries keys equals: #() asOrderedCollection. - self assert: dataSeries name equals: name. -] - { #category : #tests } DataSeriesTest >> testCrossTabulateWith [ @@ -281,6 +380,17 @@ DataSeriesTest >> testCrossTabulateWith [ self assert: (series1 crossTabulateWith: series2) equals: expected. ] +{ #category : #tests } +DataSeriesTest >> testDivideByScalar [ + + | a b | + + a := #(1 2 3) asDataSeries. + b := #(0.5 1 1.5) asDataSeries. + + self assert: a / 2 equals: b. +] + { #category : #tests } DataSeriesTest >> testDo [ @@ -290,21 +400,60 @@ DataSeriesTest >> testDo [ series do: [ :each | sum := sum + each ]. - self assert: sum equals: 550. + self assert: sum equals: 115. +] + +{ #category : #tests } +DataSeriesTest >> testEighth [ + + self assert: series eighth equals: 10. ] { #category : #tests } DataSeriesTest >> testEquality [ - | a b | - - a := #(1 0.1 'a') asDataSeries. - b := #(1 0.1 'a') asDataSeries. + | firstSeries secondSeries | - a name: 'A'. - b name: 'B'. + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. - self assert: a equals: b. + self assert: firstSeries equals: secondSeries. +] + +{ #category : #tests } +DataSeriesTest >> testFifth [ + + self assert: series fifth equals: 8. +] + +{ #category : #tests } +DataSeriesTest >> testFirst [ + + self assert: series first equals: 3. +] + +{ #category : #tests } +DataSeriesTest >> testFirstQuartile [ + + self assert: series firstQuartile equals: 7. +] + +{ #category : #tests } +DataSeriesTest >> testFourth [ + + self assert: series fourth equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testFourthQuartile [ + + self assert: series fourthQuartile equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testFourthQuartileEqualsMax [ + + self assert: series fourthQuartile equals: series max. ] { #category : #tests } @@ -323,6 +472,25 @@ DataSeriesTest >> testFrequencyTable [ self assert: aSeries frequencyTable equals: expected ] +{ #category : #tests } +DataSeriesTest >> testGroupBy [ + | firstSeries secondSeries expected actual | + + firstSeries := DataSeries withValues: #(1 10 2 1 5) name: #salary. + secondSeries := DataSeries withValues: #(Male Female Male Male Female) name: #sex. + + expected := DataSeriesGrouped new. + expected groups: (DataSeries + withKeys: #(Female Male) + values: { + DataSeries withValues: #(10 5) . + DataSeries withValues: #(1 2 1) } + name: #salary). + + actual := firstSeries groupBy: secondSeries. + self assert: actual equals: expected. +] + { #category : #tests } DataSeriesTest >> testInequality [ @@ -338,147 +506,391 @@ DataSeriesTest >> testInequality [ ] { #category : #tests } -DataSeriesTest >> testRemoveAt [ +DataSeriesTest >> testInequalityDifferentKeys [ - | expected | + | firstSeries secondSeries | - expected := #(10 20 40 50 60 70 80 90 100) asDataSeries. - expected keys: (keyArray copyWithout: #c). - expected name: series name. + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. + secondSeries := DataSeries withKeys: #(x y z) values: #(1 0.1 'a') name: 'A'. - series removeAt: #c. - - self assert: series equals: expected. + self assert: (firstSeries ~= secondSeries). ] { #category : #tests } -DataSeriesTest >> testRemoveAtIndex [ +DataSeriesTest >> testInequalityDifferentNames [ - | expected | + | firstSeries secondSeries | - expected := #(10 20 40 50 60 70 80 90 100) asDataSeries. - expected keys: (keyArray copyWithout: #c). - expected name: series name. + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'B'. - series removeAtIndex: 3. + self assert: (firstSeries ~= secondSeries). +] + +{ #category : #tests } +DataSeriesTest >> testInequalityDifferentValues [ + + | firstSeries secondSeries | - self assert: series equals: expected. + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 'a') name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.2 'a') name: 'A'. + + self assert: (firstSeries ~= secondSeries). ] { #category : #tests } -DataSeriesTest >> testReplaceFromToWithArray [ +DataSeriesTest >> testInterquartileRange [ - | replacement expected | + self assert: series interquartileRange equals: 8. +] + +{ #category : #tests } +DataSeriesTest >> testLast [ + + self assert: series last equals: 16. +] + +{ #category : #tests } +DataSeriesTest >> testMax [ + + self assert: series max equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testMedian [ + + self assert: series median equals: 9. +] + +{ #category : #tests } +DataSeriesTest >> testMin [ + + self assert: series min equals: 3. +] + +{ #category : #tests } +DataSeriesTest >> testMode [ + + self assert: series mode equals: 8. +] + +{ #category : #tests } +DataSeriesTest >> testMultiplyScalar [ + + | a b | - replacement := #(x y z). + a := #(1 2 3) asDataSeries. + b := #(2 4 6) asDataSeries. - expected := #(10 20 x y z 60 70 80 90 100) asDataSeries. - expected keys: keyArray. - expected name: series name. + self assert: 2 * a equals: b. + self assert: a * 2 equals: b. +] + +{ #category : #tests } +DataSeriesTest >> testNinth [ + + self assert: series ninth equals: 15. +] + +{ #category : #tests } +DataSeriesTest >> testPowerScalar [ + + | a b | - series replaceFrom: 3 to: 5 with: replacement. + a := #(1 2 3) asDataSeries. + b := #(1 4 9) asDataSeries. - self assert: series equals: expected. + self assert: a ** 2 equals: b. ] { #category : #tests } -DataSeriesTest >> testReplaceFromToWithSeries [ +DataSeriesTest >> testQuantile [ + + self assert: (series quantile: 0) equals: 3. + self assert: (series quantile: 10) equals: 6. + self assert: (series quantile: 25) equals: 7. + self assert: (series quantile: 50) equals: 9. + self assert: (series quantile: 75) equals: 15. + self assert: (series quantile: 100) equals: 20. +] - | replacement expected | +{ #category : #tests } +DataSeriesTest >> testQuartile [ - replacement := #(x y z) asDataSeries. - replacement keys: #(k1 k2 k3). - replacement name: #Replacement. + self assert: (series quartile: 0) equals: 3. + self assert: (series quartile: 1) equals: 7. + self assert: (series quartile: 2) equals: 9. + self assert: (series quartile: 3) equals: 15. + self assert: (series quartile: 4) equals: 20. +] + +{ #category : #tests } +DataSeriesTest >> testRange [ + + self assert: series range equals: 17. +] + +{ #category : #tests } +DataSeriesTest >> testRemoveAt [ + + | expected | - expected := #(10 20 x y z 60 70 80 90 100) asDataSeries. - expected keys: #(a b k1 k2 k3 f g h i j). - expected name: series name. + expected := DataSeries + withKeys: (keyArray copyWithout: #c) + values: #(3 7 20 8 9 8 10 15 13 16) + name: 'ExampleSeries'. - series replaceFrom: 3 to: 5 with: replacement. + series removeAt: #c. self assert: series equals: expected. ] { #category : #tests } -DataSeriesTest >> testReplaceFromToWithStartingAtArray [ +DataSeriesTest >> testRemoveAtIndex [ - | replacement expected | - - replacement := #(a b c d e). + | expected | - expected := #(10 20 b c d 60 70 80 90 100) asDataSeries. - expected keys: keyArray. - expected name: series name. + expected := DataSeries + withKeys: (keyArray copyWithout: #c) + values: #(3 7 20 8 9 8 10 15 13 16) + name: 'ExampleSeries'. - series replaceFrom: 3 to: 5 with: replacement startingAt: 2. + series removeAtIndex: 3. self assert: series equals: expected. ] { #category : #tests } -DataSeriesTest >> testReplaceFromToWithStartingAtSeries [ +DataSeriesTest >> testSecond [ + + self assert: series second equals: 7. +] + +{ #category : #tests } +DataSeriesTest >> testSecondQuartile [ + + self assert: series secondQuartile equals: 9. +] + +{ #category : #tests } +DataSeriesTest >> testSecondQuartileEqualsMedian [ + + self assert: series secondQuartile equals: series median. +] + +{ #category : #tests } +DataSeriesTest >> testSeventh [ - | replacement expected | + self assert: series seventh equals: 8. +] + +{ #category : #tests } +DataSeriesTest >> testSin [ + + | a b pi | - replacement := #(a b c d e) asDataSeries. - replacement keys: #(k1 k2 k3 k4 k5). - replacement name: #Replacement. + pi := Float pi. + a := { 0 . pi . pi/2 . pi/4 . pi/6 } asDataSeries. + b := { 0.0 . 0.0 . 1.0 . 1/2 sqrt . 0.5 } asDataSeries. - expected := #(10 20 b c d 60 70 80 90 100) asDataSeries. - expected keys: #(a b k2 k3 k4 f g h i j). - expected name: series name. + self assert: a sin closeTo: b. +] + +{ #category : #tests } +DataSeriesTest >> testSixth [ + + self assert: series sixth equals: 9. +] + +{ #category : #tests } +DataSeriesTest >> testSort [ + | expected | - series replaceFrom: 3 to: 5 with: replacement startingAt: 2. + expected := DataSeries + withKeys: #(a c b e g f h j i k d) + values: #(3 6 7 8 8 9 10 13 15 16 20) + name: series name. + + series sort. + self assert: series equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSortBlock [ + | expected | - self assert: series equals: expected. + expected := DataSeries + withKeys: #(k j i h d g f e c b a) + values: #(16 13 15 10 20 8 9 8 6 7 3) + name: series name. + + series sort: [ :a :b | a asString size > b asString size ]. + self assert: series equals: expected ] { #category : #tests } -DataSeriesTest >> testSort [ - | cities expected | +DataSeriesTest >> testSortDescending [ + | expected | - cities := #(London Dubai Paris Berlin) asDataSeries. - cities keys: #(A B C D). - cities name: #Cities. + expected := DataSeries + withKeys: #(d k i j h f g e b c a) + values: #(20 16 15 13 10 9 8 8 7 6 3) + name: series name. + + series sortDescending. + self assert: series equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSorted [ + | expected actual | - expected := #(Berlin Dubai London Paris) asDataSeries. - expected keys: #(D B A C). - expected name: #Cities. + expected := DataSeries + withKeys: #(a c b e g f h j i k d) + values: #(3 6 7 8 8 9 10 13 15 16 20) + name: series name. + + actual := series sorted. + self assert: actual equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSortedBlock [ + | expected actual | - cities sort. - self assert: cities equals: expected. + expected := DataSeries + withKeys: #(k j i h d g f e c b a) + values: #(16 13 15 10 20 8 9 8 6 7 3) + name: series name. + + actual := series sorted: [ :a :b | a asString size > b asString size ]. + self assert: actual equals: expected ] { #category : #tests } -DataSeriesTest >> testSortBlock [ - | cities expected | +DataSeriesTest >> testSortedBlockDoesNotChangeTheReceiver [ + | expected | - cities := #(London Dubai Paris Berlin) asDataSeries. - cities keys: #(A B C D). - cities name: #Cities. + expected := DataSeries + withKeys: #(a b c d e f g h i j k) + values: #(3 7 6 20 8 9 8 10 15 13 16) + name: series name. + + series sorted: [ :a :b | a asString size > b asString size ]. + self assert: series equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSortedDescending [ + | expected actual | - expected := #(Dubai London Berlin Paris) asDataSeries. - expected keys: #(B A D C). - expected name: #Cities. + expected := DataSeries + withKeys: #(d k i j h f g e b c a) + values: #(20 16 15 13 10 9 8 8 7 6 3) + name: series name. + + actual := series sortedDescending. + self assert: actual equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testSortedDescendingDoesNotChangeTheReceiver [ + | expected | - cities sort: [ :a :b | a value last <= b value last ]. - self assert: cities equals: expected. + expected := DataSeries + withKeys: #(a b c d e f g h i j k) + values: #(3 7 6 20 8 9 8 10 15 13 16) + name: series name. + + series sortedDescending. + self assert: series equals: expected ] { #category : #tests } -DataSeriesTest >> testSortDescending [ - | cities expected | +DataSeriesTest >> testSortedDoesNotChangeTheReceiver [ + | expected | - cities := #(London Dubai Paris Berlin) asDataSeries. - cities keys: #(A B C D). - cities name: #Cities. + expected := DataSeries + withKeys: #(a b c d e f g h i j k) + values: #(3 7 6 20 8 9 8 10 15 13 16) + name: series name. + + series sorted. + self assert: series equals: expected +] + +{ #category : #tests } +DataSeriesTest >> testStdev [ + + self assert: series stdev closeTo: 5.00727. +] + +{ #category : #tests } +DataSeriesTest >> testSubtractElementwise [ + + | a b c d | + + a := #(1 2 3) asDataSeries. + b := #(3 6 8) asDataSeries. + c := #(-2 -4 -5) asDataSeries. + d := #(2 4 5) asDataSeries. + + self assert: a - b equals: c. + self assert: b - a equals: d. +] + +{ #category : #tests } +DataSeriesTest >> testSummary [ + | expected actual | - expected := #(Paris London Dubai Berlin) asDataSeries. - expected keys: #(C A B D). - expected name: #Cities. + expected := DataSeries + withKeys: #(Min '1st Qu.' Median Average '3rd Qu.' Max) + values: { 3 . 7 . 9 . (115 / 11) . 15 . 20 } + name: series name. + + actual := series summary. + + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testThird [ + + self assert: series third equals: 6. +] + +{ #category : #tests } +DataSeriesTest >> testThirdQuartile [ + + self assert: series thirdQuartile equals: 15. +] + +{ #category : #tests } +DataSeriesTest >> testUniqueValues [ + | aSeries expected actual | + aSeries := DataSeries withKeys: #(a b c d e) values: #(z y y z x). + expected := #(x y z). + actual := aSeries uniqueValues. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testValueCounts [ + | actual expected | - cities sortDescending. - self assert: cities equals: expected. + expected := DataSeries + withKeys: #(8 16 15 13 10 9 7 6 20 3) + values: #(2 1 1 1 1 1 1 1 1 1) + name: series name. + + actual := series valueCounts. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testVariance [ + + self assert: series variance closeTo: 25.07273. ] { #category : #tests } @@ -489,9 +901,10 @@ DataSeriesTest >> testWithIndexCollect [ actual := series withIndexCollect: [ :each :i | each / 10 + i ]. - expected := (2 to: 20 by: 2) asDataSeries. - expected keys: keyArray. - expected name: series name. + expected := DataSeries + withKeys: keyArray + values: {(13/10). (27/10). (18/5). 6. (29/5). (69/10). (39/5). 9. (21/2). (113/10). (63/5)} + name: 'ExampleSeries'. self assert: actual equals: expected. ] @@ -505,7 +918,7 @@ DataSeriesTest >> testWithIndexDo [ series withIndexDo: [ :each :i | sum := sum + (each / i) ]. - self assert: sum equals: 100. + self assert: sum equals: (108173/4620). ] { #category : #tests } @@ -516,9 +929,10 @@ DataSeriesTest >> testWithKeyCollect [ actual := series withKeyCollect: [ :each :key | each / 10 + (keyArray indexOf: key) ]. - expected := (2 to: 20 by: 2) asDataSeries. - expected keys: keyArray. - expected name: series name. + expected := DataSeries + withKeys: keyArray + values: {(13/10). (27/10). (18/5). 6. (29/5). (69/10). (39/5). 9. (21/2). (113/10). (63/5)} + name: 'ExampleSeries'. self assert: actual equals: expected. ] @@ -532,5 +946,17 @@ DataSeriesTest >> testWithKeyDo [ series withKeyDo: [ :each :key | sum := sum + (each / (keyArray indexOf: key)) ]. - self assert: sum equals: 100. + self assert: sum equals: (108173/4620). +] + +{ #category : #tests } +DataSeriesTest >> testZerothQuartile [ + + self assert: series zerothQuartile equals: 3. +] + +{ #category : #tests } +DataSeriesTest >> testZerothQuartileEqualsMin [ + + self assert: series zerothQuartile equals: series min. ] diff --git a/src/DataFrame/Collection.extension.st b/src/DataFrame/Collection.extension.st index 4cf98c9e..8f45e1b7 100644 --- a/src/DataFrame/Collection.extension.st +++ b/src/DataFrame/Collection.extension.st @@ -26,3 +26,20 @@ Collection >> variance [ ^ self stdev squared. ] + +{ #category : #'*DataFrame' } +Collection >> withSeries: aDataSeries collect: twoArgBlock [ + "Collect and return the result of evaluating twoArgBlock with corresponding elements from this collection and aDataSeries." + | result | + aDataSeries size = self size ifFalse: [self errorSizeMismatch]. + + result := aDataSeries species new: self size. + result name: aDataSeries name. + + aDataSeries keys withIndexDo: [ :key :i | + result at: key put: + (twoArgBlock + value: (self at: i) + value: (aDataSeries at: key))]. + ^ result +] diff --git a/src/DataFrame/DataFrame.class.st b/src/DataFrame/DataFrame.class.st index ebf96e32..0a352d9e 100644 --- a/src/DataFrame/DataFrame.class.st +++ b/src/DataFrame/DataFrame.class.st @@ -367,7 +367,7 @@ DataFrame >> addEmptyRowNamed: aString atPosition: aNumber [ { #category : #adding } DataFrame >> addRow: aDataSeries [ "Add DataSeries as a new row at the end" - self addRow: aDataSeries named: aDataSeries name. + self addRow: aDataSeries asArray named: aDataSeries name. ] { #category : #adding } @@ -957,14 +957,17 @@ DataFrame >> select: aBlock [ Collect into a new collection like the receiver, only those elements for which aBlock evaluates to true. Answer the new collection." - | rows selectedRows df | + | rows selectedRows selectedRowNames selectedRowsAsArrays df | rows := self asArrayOfRows. selectedRows := rows select: aBlock. + selectedRowNames := selectedRows collect: #name. + selectedRowsAsArrays := selectedRows collect: #asArray. - df := self class withRows: selectedRows. - df columnNames: self columnNames. - df rowNames: (selectedRows collect: #name). + df := self class + withRows: selectedRowsAsArrays + rowNames: selectedRowNames + columnNames: self columnNames. ^ df ] diff --git a/src/DataFrame/DataFrameGrouped.class.st b/src/DataFrame/DataFrameGrouped.class.st index 7ff91d14..1d0595ff 100644 --- a/src/DataFrame/DataFrameGrouped.class.st +++ b/src/DataFrame/DataFrameGrouped.class.st @@ -15,7 +15,7 @@ DataFrameGrouped >> apply: aBlock [ | colNames numberOfRows numberOfColumns result | - colNames := (groups at: 1) columnNames. + colNames := groups first columnNames. numberOfRows := groups size. numberOfColumns := colNames size. @@ -24,7 +24,7 @@ DataFrameGrouped >> apply: aBlock [ result rowNames: groups keys. result columnNames: colNames. - groups doWithIndex: [ :df :i | + groups withIndexDo: [ :df :i | 1 to: colNames size do: [ :j | result at: i at: j put: (aBlock value: (df columnAt: j)) ] ]. @@ -56,15 +56,15 @@ DataFrameGrouped >> split: aDataFrame by: aSeries [ aDataFrame numberOfRows = aSeries size ifFalse: [ SizeMismatch signal ]. - seriesUnique := aSeries unique asArray. + seriesUnique := aSeries uniqueValues. groups := seriesUnique collect: [ :eachUnique | | aList df | aList := LinkedList new. - aSeries doWithIndex: [ :each :i | + aSeries withIndexDo: [ :each :i | each = eachUnique - ifTrue: [ aList add: (aDataFrame rowAt: i) ] ]. + ifTrue: [ aList add: (aDataFrame rowAt: i) asArray ] ]. df := DataFrame withRows: aList. df columnNames: aDataFrame columnNames. diff --git a/src/DataFrame/DataGrouped.class.st b/src/DataFrame/DataGrouped.class.st index bb05e4b4..2fb66169 100644 --- a/src/DataFrame/DataGrouped.class.st +++ b/src/DataFrame/DataGrouped.class.st @@ -7,6 +7,14 @@ Class { #category : #'DataFrame-Core' } +{ #category : #comparing } +DataGrouped >> = anObject [ + self species == anObject species + ifFalse: [ ^ false ]. + + ^ self groups = anObject groups +] + { #category : #private } DataGrouped >> apply: aBlock [ @@ -27,10 +35,14 @@ DataGrouped >> count [ { #category : #accessing } DataGrouped >> groups [ - ^ groups ] +{ #category : #accessing } +DataGrouped >> groups: anObject [ + groups := anObject +] + { #category : #private } DataGrouped >> max [ diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 411e6024..42bf9ffd 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -39,33 +39,156 @@ DataSeries class >> withValues: values name: aName [ ^ (self withKeys: keys values: values) name: aName; yourself ] -{ #category : #'as yet unclassified' } +{ #category : #comparing } +DataSeries >> = anObject [ + (super = anObject) + ifFalse: [ ^ false ]. + + ^ anObject name = self name + "order of keys" + and: [ anObject keys = self keys ] +] + +{ #category : #adapting } +DataSeries >> adaptToCollection: rcvr andSend: selector [ + "If I am involved in arithmetic with another Collection, return a Collection of + the results of each element combined with the scalar in that expression." + + (rcvr isSequenceable and: [ self isSequenceable ]) ifFalse: + [self error: 'Only sequenceable collections may be combined arithmetically']. + ^ rcvr withSeries: self collect: + [:rcvrElement :myElement | rcvrElement perform: selector with: myElement] +] + +{ #category : #accessing } DataSeries >> atIndex: aNumber [ ^ self at: (self keys at: aNumber) ] -{ #category : #'as yet unclassified' } +{ #category : #accessing } DataSeries >> atIndex: aNumber ifAbsent: aBlock [ ^ [ self at: (self keys at: aNumber) ] on: SubscriptOutOfBounds do: aBlock ] -{ #category : #'as yet unclassified' } +{ #category : #accessing } DataSeries >> atIndex: aNumber put: aValue [ ^ self at: (self keys at: aNumber) put: aValue ] +{ #category : #enumerating } +DataSeries >> collect: aBlock [ + | result | + result := super collect: aBlock. + result name: self name. + ^ result +] + +{ #category : #'as yet unclassified' } +DataSeries >> crossTabulateWith: aSeries [ + | df | + + (self size = aSeries size) + ifFalse: [ SizeMismatch signal ]. + + df := DataFrame withRows: + (self uniqueValues collect: [ :each1 | + aSeries uniqueValues collect: [ :each2 | + (1 to: self size) inject: 0 into: [ :accum :i | + (((self at: i) = each1) and: ((aSeries at: i) = each2)) + ifTrue: [ accum + 1 ] + ifFalse: [ accum ] ] ] ]). + + df rowNames: self uniqueValues. + df columnNames: aSeries uniqueValues. + ^ df +] + { #category : #accessing } DataSeries >> defaultName [ ^ '(no name)' ] +{ #category : #accessing } +DataSeries >> eighth [ + "Answer the eighth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 8 +] + +{ #category : #errors } +DataSeries >> errorKeysMismatch [ + Error signal: 'Keys of two series do not match' +] + +{ #category : #accessing } +DataSeries >> fifth [ + "Answer the fifth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 5 +] + +{ #category : #accessing } +DataSeries >> first [ + "Answer the first element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 1 +] + +{ #category : #statistics } +DataSeries >> firstQuartile [ + ^ self quartile: 1 +] + +{ #category : #accessing } +DataSeries >> fourth [ + "Answer the fourth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 4 +] + +{ #category : #statistics } +DataSeries >> fourthQuartile [ + ^ self quartile: 4 +] + +{ #category : #'as yet unclassified' } +DataSeries >> frequencyTable [ + | df count proportion | + + count := self valueCounts. + proportion := count / self size. + + df := DataFrame withColumns: + { count asArray . proportion asArray }. + + df rowNames: self uniqueValues asArray. + df columnNames: #(Count Proportion). + + ^ df +] + +{ #category : #queries } +DataSeries >> groupBy: otherSeries [ + ^ DataSeriesGrouped group: self by: otherSeries +] + { #category : #initialization } DataSeries >> initialize [ super initialize. name := self defaultName. ] -{ #category : #'as yet unclassified' } +{ #category : #statistics } +DataSeries >> interquartileRange [ + ^ self thirdQuartile - self firstQuartile +] + +{ #category : #testing } +DataSeries >> isSequenceable [ + ^ true +] + +{ #category : #accessing } DataSeries >> keys: anArrayOfKeys [ | keys | keys := anArrayOfKeys deepCopy. @@ -73,6 +196,21 @@ DataSeries >> keys: anArrayOfKeys [ orderedKeys := keys. ] +{ #category : #accessing } +DataSeries >> last [ + "Answer the last element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: self size +] + +{ #category : #accessing } +DataSeries >> mode [ + | valueCounts maxCount | + valueCounts := (self groupBy: self) count. + maxCount := valueCounts max. + ^ valueCounts keyAtValue: maxCount. +] + { #category : #accessing } DataSeries >> name [ ^ name @@ -83,6 +221,30 @@ DataSeries >> name: anObject [ name := anObject ] +{ #category : #accessing } +DataSeries >> ninth [ + "Answer the ninth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 9 +] + +{ #category : #statistics } +DataSeries >> quantile: aNumber [ + | sortedSeries index | + sortedSeries := self sort. + + aNumber = 0 + ifTrue: [ ^ sortedSeries first ]. + + index := (sortedSeries size * (aNumber / 100)) ceiling. + ^ sortedSeries atIndex: index. +] + +{ #category : #statistics } +DataSeries >> quartile: aNumber [ + ^ self quantile: (25 * aNumber) +] + { #category : #removing } DataSeries >> removeAt: aKey [ ^ self removeKey: aKey @@ -93,28 +255,145 @@ DataSeries >> removeAtIndex: aNumber [ ^ self removeAt: (self keys at: aNumber) ] +{ #category : #accessing } +DataSeries >> second [ + "Answer the second element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 2 +] + +{ #category : #statistics } +DataSeries >> secondQuartile [ + ^ self quartile: 2 +] + +{ #category : #accessing } +DataSeries >> seventh [ + "Answer the seventh element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 7 +] + +{ #category : #accessing } +DataSeries >> sixth [ + "Answer the sixth element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 6 +] + { #category : #sorting } DataSeries >> sort [ - ^ self sort: [ :a :b | a value <= b value ] + self sort: [ :a :b | a <= b ] ] { #category : #sorting } DataSeries >> sort: aBlock [ + | associationBlock | + associationBlock := [ :a :b | aBlock value: a value value: b value ]. + self sortAssociations: associationBlock +] + +{ #category : #sorting } +DataSeries >> sortAssociations: aBlock [ | sortedAssociations | sortedAssociations := self associations sort: aBlock. - ^ sortedAssociations asDataSeries name: self name; yourself + self removeAll. + self addAll: sortedAssociations. ] { #category : #sorting } DataSeries >> sortDescending [ - ^ self sort: [ :a :b | a value > b value ] + self sort: [ :a :b | a > b ] +] + +{ #category : #sorting } +DataSeries >> sorted [ + ^ self sorted: [ :a :b | a <= b ] +] + +{ #category : #sorting } +DataSeries >> sorted: aBlock [ + | associationBlock | + associationBlock := [ :a :b | aBlock value: a value value: b value ]. + ^ self sortedAssociations: associationBlock +] + +{ #category : #sorting } +DataSeries >> sortedAssociations: aBlock [ + | sortedAssociations | + sortedAssociations := self associations sort: aBlock. + ^ sortedAssociations asDataSeries name: self name; yourself +] + +{ #category : #sorting } +DataSeries >> sortedDescending [ + ^ self sorted: [ :a :b | a > b ] +] + +{ #category : #statistics } +DataSeries >> summary [ + | summary | + summary := self species new. + summary name: self name. + + summary + at: 'Min' put: self min; + at: '1st Qu.' put: self firstQuartile; + at: 'Median' put: self median; + at: 'Average' put: self average; + at: '3rd Qu.' put: self thirdQuartile; + at: 'Max' put: self max. + + ^ summary +] + +{ #category : #accessing } +DataSeries >> third [ + "Answer the third element of the receiver. + Raise an error if there are not enough elements." + ^ self atIndex: 3 +] + +{ #category : #statistics } +DataSeries >> thirdQuartile [ + ^ self quartile: 3 +] + +{ #category : #accessing } +DataSeries >> uniqueValues [ + ^ self asSet asArray +] + +{ #category : #statistics } +DataSeries >> valueCounts [ + ^ (self groupBy: self) count sortDescending +] + +{ #category : #enumerating } +DataSeries >> with: aCollection collect: twoArgBlock [ + "Collect and return the result of evaluating twoArgBlock with corresponding elements from this series and aCollection." + | result | + aCollection size = self size ifFalse: [self errorSizeMismatch]. + + result := self species new: self size. + result name: self name. + + self keys withIndexDo: [ :key :i | + result at: key put: + (twoArgBlock + value: (self at: key) + value: (aCollection at: i))]. + ^ result ] { #category : #enumerating } DataSeries >> withIndexCollect: aBlock [ - ^ self species newFrom: + | result | + result := self species newFrom: (self associations withIndexCollect: [:each :i | - each key -> (aBlock value: each value value: i)]) + each key -> (aBlock value: each value value: i)]). + result name: self name. + ^ result ] { #category : #enumerating } @@ -124,12 +403,41 @@ DataSeries >> withIndexDo: aBlock [ { #category : #enumerating } DataSeries >> withKeyCollect: aBlock [ - ^ self species newFrom: + | result | + result := self species newFrom: (self associations collect: [:each | - each key -> (aBlock value: each value value: each key)]) + each key -> (aBlock value: each value value: each key)]). + result name: self name. + ^ result ] { #category : #enumerating } DataSeries >> withKeyDo: aBlock [ self keysDo: [ :each | aBlock value: (self at: each) value: each ] ] + +{ #category : #enumerating } +DataSeries >> withSeries: otherDataSeries collect: twoArgBlock [ + "Collect and return the result of evaluating twoArgBlock with corresponding elements from this series and otherDataSeries." + | result | + otherDataSeries size = self size ifFalse: [self errorSizeMismatch]. + otherDataSeries keys = self keys ifFalse: [ self errorKeysMismatch ]. + + result := self species new: self size. + + result name: ((otherDataSeries name = self name) + ifTrue: [ self name ] + ifFalse: [ self defaultName ]). + + self keysDo: [ :key | + result at: key put: + (twoArgBlock + value: (self at: key) + value: (otherDataSeries at: key))]. + ^ result +] + +{ #category : #statistics } +DataSeries >> zerothQuartile [ + ^ self quartile: 0 +] diff --git a/src/DataFrame/DataSeriesGrouped.class.st b/src/DataFrame/DataSeriesGrouped.class.st index 14db94f4..dda6a849 100644 --- a/src/DataFrame/DataSeriesGrouped.class.st +++ b/src/DataFrame/DataSeriesGrouped.class.st @@ -30,7 +30,7 @@ DataSeriesGrouped >> printOn: aStream [ super printOn: aStream. aStream cr. - groups doWithIndex: [ :eachGroup :i | + groups withIndexDo: [ :eachGroup :i | aStream nextPutAll: (groups keys at: i) asString; nextPutAll: ': '. @@ -48,20 +48,22 @@ DataSeriesGrouped >> split: firstSeries by: secondSeries [ firstSeries size = secondSeries size ifFalse: [ SizeMismatch signal ]. - secondUnique := secondSeries unique asArray. + secondUnique := secondSeries uniqueValues asArray. groups := secondUnique collect: [ :eachUnique | | aList | aList := LinkedList new. - secondSeries doWithIndex: [ :eachSecond :i | + secondSeries withIndexDo: [ :eachSecond :i | eachSecond = eachUnique - ifTrue: [ aList add: (firstSeries at: i) ] ]. + ifTrue: [ aList add: (firstSeries atIndex: i) ] ]. aList asDataSeries ]. - groups := groups asDataSeries. - groups keys: secondUnique. + groups := DataSeries + withKeys: secondUnique + values: groups asDataSeries + name: firstSeries name. ^ self ] From 0910766573a13867c88439bd4e5426f92b7bcaa3 Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Wed, 13 Mar 2019 20:53:11 +0100 Subject: [PATCH 3/9] Fixed head/tails tests of DataSeries --- .../DataFrameHeadTailTest.class.st | 58 ------------------- src/DataFrame-Tests/DataSeriesTest.class.st | 52 +++++++++++++++++ src/DataFrame/DataSeries.class.st | 37 +++++++++++- 3 files changed, 86 insertions(+), 61 deletions(-) diff --git a/src/DataFrame-Tests/DataFrameHeadTailTest.class.st b/src/DataFrame-Tests/DataFrameHeadTailTest.class.st index 52c0cb8d..ae6d4e5c 100644 --- a/src/DataFrame-Tests/DataFrameHeadTailTest.class.st +++ b/src/DataFrame-Tests/DataFrameHeadTailTest.class.st @@ -104,64 +104,6 @@ DataFrameHeadTailTest >> testDataFrameTailN [ self assert: actual equals: expected. ] -{ #category : #tests } -DataFrameHeadTailTest >> testDataSeriesHead [ - - | actual expected | - - expected := #(5.1 4.9 4.7 7 6.4) asDataSeries. - expected name: series name. - expected keys: (1 to: series defaultHeadTailSize). - - actual := series head. - - self assert: actual equals: expected. -] - -{ #category : #tests } -DataFrameHeadTailTest >> testDataSeriesHeadN [ - - | actual expected | - - expected := #(5.1 4.9 4.7) asDataSeries. - expected name: series name. - expected keys: (1 to: 3). - - actual := series head: 3. - - self assert: actual equals: expected. -] - -{ #category : #tests } -DataFrameHeadTailTest >> testDataSeriesTail [ - - | actual expected | - - expected := #(6.4 6.9 6.3 5.8 7.1) asDataSeries. - expected name: series name. - expected keys: - (series size - series defaultHeadTailSize + 1 to: series size). - - actual := series tail. - - self assert: actual equals: expected. -] - -{ #category : #tests } -DataFrameHeadTailTest >> testDataSeriesTailN [ - - | actual expected | - - expected := #(6.3 5.8 7.1) asDataSeries. - expected name: series name. - expected keys: - (series size - 3 + 1 to: series size). - - actual := series tail: 3. - - self assert: actual equals: expected. -] - { #category : #tests } DataFrameHeadTailTest >> testDefaultHeadTailSize [ diff --git a/src/DataFrame-Tests/DataSeriesTest.class.st b/src/DataFrame-Tests/DataSeriesTest.class.st index a382cc96..9ff4dde8 100644 --- a/src/DataFrame-Tests/DataSeriesTest.class.st +++ b/src/DataFrame-Tests/DataSeriesTest.class.st @@ -491,6 +491,32 @@ DataSeriesTest >> testGroupBy [ self assert: actual equals: expected. ] +{ #category : #tests } +DataSeriesTest >> testHead [ + | expected actual | + + expected := DataSeries + withKeys: #(a b c d e) + values: #(3 7 6 20 8) + name: series name. + + actual := series head. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testHeadN [ + | expected actual | + + expected := DataSeries + withKeys: #(a b) + values: #(3 7) + name: series name. + + actual := series head: 2. + self assert: actual equals: expected. +] + { #category : #tests } DataSeriesTest >> testInequality [ @@ -853,6 +879,32 @@ DataSeriesTest >> testSummary [ self assert: actual equals: expected. ] +{ #category : #tests } +DataSeriesTest >> testTail [ + | expected actual | + + expected := DataSeries + withKeys: #(g h i j k) + values: #(8 10 15 13 16) + name: series name. + + actual := series tail. + self assert: actual equals: expected. +] + +{ #category : #tests } +DataSeriesTest >> testTailN [ + | expected actual | + + expected := DataSeries + withKeys: #(j k) + values: #(13 16) + name: series name. + + actual := series tail: 2. + self assert: actual equals: expected. +] + { #category : #tests } DataSeriesTest >> testThird [ diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 42bf9ffd..4d7b07cc 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -83,7 +83,7 @@ DataSeries >> collect: aBlock [ ^ result ] -{ #category : #'as yet unclassified' } +{ #category : #statistics } DataSeries >> crossTabulateWith: aSeries [ | df | @@ -103,7 +103,12 @@ DataSeries >> crossTabulateWith: aSeries [ ^ df ] -{ #category : #accessing } +{ #category : #defaults } +DataSeries >> defaultHeadTailSize [ + ^ 5 +] + +{ #category : #defaults } DataSeries >> defaultName [ ^ '(no name)' ] @@ -151,7 +156,7 @@ DataSeries >> fourthQuartile [ ^ self quartile: 4 ] -{ #category : #'as yet unclassified' } +{ #category : #statistics } DataSeries >> frequencyTable [ | df count proportion | @@ -172,6 +177,19 @@ DataSeries >> groupBy: otherSeries [ ^ DataSeriesGrouped group: self by: otherSeries ] +{ #category : #slicing } +DataSeries >> head [ + ^ self head: self defaultHeadTailSize. +] + +{ #category : #slicing } +DataSeries >> head: aNumber [ + ^ self species + withKeys: (self keys copyFrom: 1 to: aNumber) + values: (self values copyFrom: 1 to: aNumber) + name: self name. +] + { #category : #initialization } DataSeries >> initialize [ super initialize. @@ -347,6 +365,19 @@ DataSeries >> summary [ ^ summary ] +{ #category : #slicing } +DataSeries >> tail [ + ^ self tail: self defaultHeadTailSize. +] + +{ #category : #slicing } +DataSeries >> tail: aNumber [ + ^ self species + withKeys: (self keys copyFrom: self size - aNumber + 1 to: self size) + values: (self values copyFrom: self size - aNumber + 1 to: self size) + name: self name. +] + { #category : #accessing } DataSeries >> third [ "Answer the third element of the receiver. From 9d59adbaf94e3e5cb4214e21644bca766eb956bc Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Wed, 13 Mar 2019 21:13:43 +0100 Subject: [PATCH 4/9] All tests are passing --- .../DataFrameAggrGroupTest.class.st | 35 ++++++++++--------- src/DataFrame/DataFrame.class.st | 8 ++--- src/DataFrame/DataFrameGrouped.class.st | 2 +- src/DataFrame/DataSeries.class.st | 2 +- 4 files changed, 25 insertions(+), 22 deletions(-) diff --git a/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st b/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st index 1f47c481..ce0c3462 100644 --- a/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st +++ b/src/DataFrame-Tests/DataFrameAggrGroupTest.class.st @@ -25,24 +25,25 @@ DataFrameAggrGroupTest >> testAggregateAverage [ | expected actual | - expected := #(18.3433 20.79) asDataSeries. - expected keys: #(Male Female). + expected := DataSeries + withKeys: #(Male Female) + values: #(18.3433 20.79) + name: #total_bill. actual := (df group: #total_bill by: #sex) average. - self assert: actual closeTo: expected. ] { #category : #initialization } DataFrameAggrGroupTest >> testAggregateMax [ - | expected actual | - expected := #(23.68 24.59) asDataSeries. - expected keys: #(Male Female). + expected := DataSeries + withKeys: #(Male Female) + values: #(23.68 24.59) + name: #total_bill. actual := (df group: #total_bill by: #sex) max. - self assert: actual equals: expected. ] @@ -51,11 +52,12 @@ DataFrameAggrGroupTest >> testAggregateMin [ | expected actual | - expected := #(10.34 16.99) asDataSeries. - expected keys: #(Male Female). + expected := DataSeries + withKeys: #(Male Female) + values: #(10.34 16.99) + name: #total_bill. actual := (df group: #total_bill by: #sex) min. - self assert: actual equals: expected. ] @@ -81,15 +83,16 @@ DataFrameAggrGroupTest >> testDataFrameGroupedPrintOn [ { #category : #tests } DataFrameAggrGroupTest >> testGroupSeriesBySeries [ - | femaleGroup maleGroup expectedSeries actualSeries | + | femaleGroup maleGroup expected actual | femaleGroup := #(16.99 24.59) asDataSeries. maleGroup := #(10.34 21.01 23.68) asDataSeries. - expectedSeries := { maleGroup . femaleGroup } asDataSeries. - expectedSeries keys: #(Male Female). - - actualSeries := (df group: #total_bill by: #sex) groups. + expected := DataSeries + withKeys: #(Male Female) + values: { maleGroup . femaleGroup } + name: #total_bill. -self assert: actualSeries equals: expectedSeries. + actual := (df group: #total_bill by: #sex) groups. + self assert: actual equals: expected. ] diff --git a/src/DataFrame/DataFrame.class.st b/src/DataFrame/DataFrame.class.st index 0a352d9e..9024ae27 100644 --- a/src/DataFrame/DataFrame.class.st +++ b/src/DataFrame/DataFrame.class.st @@ -975,8 +975,8 @@ DataFrame >> select: aBlock [ { #category : #private } DataFrame >> setDefaultRowColumnNames [ - self rowNames: (1 to: self numberOfRows) asOrderedCollection. - self columnNames: (1 to: self numberOfColumns) asOrderedCollection. + self rowNames: (1 to: self numberOfRows). + self columnNames: (1 to: self numberOfColumns). ] { #category : #'gt-inspector-extension' } @@ -1010,13 +1010,13 @@ DataFrame >> sortBy: columnName using: aBlock [ | column sortedKeys newContents | column := self column: columnName. column := column copy. - column sortUsing: aBlock. + column sort: aBlock. sortedKeys := column keys. newContents := DataFrameInternal new: self dimensions. sortedKeys withIndexDo: [ :key :i | - newContents rowAt: i put: (self row: key) ]. + newContents rowAt: i put: (self row: key) asArray ]. contents := newContents. self rowNames: sortedKeys. diff --git a/src/DataFrame/DataFrameGrouped.class.st b/src/DataFrame/DataFrameGrouped.class.st index 1d0595ff..b31e4c29 100644 --- a/src/DataFrame/DataFrameGrouped.class.st +++ b/src/DataFrame/DataFrameGrouped.class.st @@ -38,7 +38,7 @@ DataFrameGrouped >> printOn: aStream [ super printOn: aStream. aStream cr. - groups doWithIndex: [ :eachDataFrame :i | + groups withIndexDo: [ :eachDataFrame :i | aStream nextPutAll: (groups keys at: i) asString; nextPutAll: ': '. diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 4d7b07cc..42b298db 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -209,7 +209,7 @@ DataSeries >> isSequenceable [ { #category : #accessing } DataSeries >> keys: anArrayOfKeys [ | keys | - keys := anArrayOfKeys deepCopy. + keys := anArrayOfKeys asArray deepCopy. dictionary := self dictionaryClass newFromKeys: keys andValues: self values. orderedKeys := keys. ] From 5687cdfa9a1e41bf7792c938e6c774afd38925d7 Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Wed, 13 Mar 2019 21:26:57 +0100 Subject: [PATCH 5/9] Fixed and tested DataSeries>>closeTo: --- src/DataFrame-Tests/DataSeriesTest.class.st | 44 +++++++++++++++++++++ src/DataFrame/DataSeries.class.st | 18 +++++++++ 2 files changed, 62 insertions(+) diff --git a/src/DataFrame-Tests/DataSeriesTest.class.st b/src/DataFrame-Tests/DataSeriesTest.class.st index 9ff4dde8..af11bfaf 100644 --- a/src/DataFrame-Tests/DataSeriesTest.class.st +++ b/src/DataFrame-Tests/DataSeriesTest.class.st @@ -190,6 +190,50 @@ DataSeriesTest >> testAverage [ self assert: series average equals: (115/11). ] +{ #category : #tests } +DataSeriesTest >> testCloseTo [ + + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 2) name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.10000001 2) name: 'A'. + + self assert: firstSeries closeTo: secondSeries. +] + +{ #category : #tests } +DataSeriesTest >> testCloseToDifferentKeys [ + + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 2) name: 'A'. + secondSeries := DataSeries withKeys: #(x y z) values: #(1 0.10000001 2) name: 'A'. + + self deny: (firstSeries closeTo: secondSeries). +] + +{ #category : #tests } +DataSeriesTest >> testCloseToDifferentNames [ + + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 2) name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 0.10000001 2) name: 'B'. + + self deny: (firstSeries closeTo: secondSeries). +] + +{ #category : #tests } +DataSeriesTest >> testCloseToDifferentValues [ + + | firstSeries secondSeries | + + firstSeries := DataSeries withKeys: #(a b c) values: #(1 0.1 2) name: 'A'. + secondSeries := DataSeries withKeys: #(a b c) values: #(1 3.10000001 2) name: 'A'. + + self deny: (firstSeries closeTo: secondSeries). +] + { #category : #tests } DataSeriesTest >> testCollect [ diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 42b298db..91103205 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -75,6 +75,24 @@ DataSeries >> atIndex: aNumber put: aValue [ ^ self at: (self keys at: aNumber) put: aValue ] +{ #category : #comparing } +DataSeries >> closeTo: anObject [ + self == anObject + ifTrue: [^ true]. + + (self species == anObject species + and: [self size = anObject size]) + ifFalse: [^ false]. + + (anObject name = self name) + ifFalse: [ ^ false ]. + + (anObject keys = self keys) + ifFalse: [ ^ false ]. + + ^ super closeTo: anObject +] + { #category : #enumerating } DataSeries >> collect: aBlock [ | result | From 294107b5d1788d6d0392e8d47b124aabc1c9c5f6 Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Thu, 14 Mar 2019 00:47:46 +0100 Subject: [PATCH 6/9] Fixed the failing DataFrame-IO tests --- src/DataFrame-Type/DataFrameTypeDetector.class.st | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/DataFrame-Type/DataFrameTypeDetector.class.st b/src/DataFrame-Type/DataFrameTypeDetector.class.st index 7526052b..9fb0ea08 100644 --- a/src/DataFrame-Type/DataFrameTypeDetector.class.st +++ b/src/DataFrame-Type/DataFrameTypeDetector.class.st @@ -100,7 +100,7 @@ DataFrameTypeDetector >> detectColumnTypeAndConvert: aDataSeries [ DataFrameTypeDetector >> detectTypesAndConvert: aDataFrame [ aDataFrame columnNames do: [ :columnName | aDataFrame column: columnName put: ( - self detectColumnTypeAndConvert: (aDataFrame column: columnName)) ]. + self detectColumnTypeAndConvert: (aDataFrame column: columnName)) asArray ]. aDataFrame rowNames: (self detectColumnTypeAndConvert: aDataFrame rowNames). ] From 0f381dd5dd0bb22c42ba474279f859c4982dd3ee Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Thu, 14 Mar 2019 01:38:29 +0100 Subject: [PATCH 7/9] Fixed all tests on Pharo 7 --- .../DataFrameStatsTest.class.st | 18 +++++++++--------- src/DataFrame-Tests/DataSeriesTest.class.st | 8 ++++---- src/DataFrame/DataSeries.class.st | 12 +++++++++++- src/DataFrame/DataSeriesGrouped.class.st | 10 +--------- 4 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/DataFrame-Tests/DataFrameStatsTest.class.st b/src/DataFrame-Tests/DataFrameStatsTest.class.st index 82ceb5b3..31b13c22 100644 --- a/src/DataFrame-Tests/DataFrameStatsTest.class.st +++ b/src/DataFrame-Tests/DataFrameStatsTest.class.st @@ -11,7 +11,7 @@ Class { DataFrameStatsTest >> setUp [ df := DataFrame withRows: #( - (5.1 3.5 1.4 0.2) + (7.1 3.5 1.4 0.2) (4.9 3 1.4 0.2) (4.7 3.2 1.3 0.2) (7 3.2 4.7 1.4) @@ -29,7 +29,7 @@ DataFrameStatsTest >> testAverage [ | expected actual | - expected := { 6.022222 . 3.133333 . 3.911111 . 1.277778 } asDataSeries. + expected := { 6.244444 . 3.133333 . 3.911111 . 1.277778 } asDataSeries. expected name: #average. expected keys: df columnNames. @@ -42,7 +42,7 @@ DataFrameStatsTest >> testFirstQuartile [ | expected actual | - expected := { 5.1 . 3 . 1.4 . 0.2 } asDataSeries. + expected := { 5.8 . 3 . 1.4 . 0.2 } asDataSeries. expected name: #firstQuartile. expected keys: df columnNames. @@ -55,7 +55,7 @@ DataFrameStatsTest >> testInterquartileRange [ | expected actual | - expected := { 1.8 . 0.2 . 3.7 . 1.7 } asDataSeries. + expected := { 1.2 . 0.2 . 3.7 . 1.7 } asDataSeries. expected name: #interquartileRange. expected keys: df columnNames. @@ -81,7 +81,7 @@ DataFrameStatsTest >> testMedian [ | expected actual | - expected := { 6.3 . 3.2 . 4.7 . 1.5 } asDataSeries. + expected := { 6.4 . 3.2 . 4.7 . 1.5 } asDataSeries. expected name: #median. expected keys: df columnNames. @@ -107,7 +107,7 @@ DataFrameStatsTest >> testMode [ | expected actual | - expected := { 6.3 . 3.2 . 1.4 . 0.2 } asDataSeries. + expected := { 7.1 . 3.2 . 1.4 . 0.2 } asDataSeries. expected name: #mode. expected keys: df columnNames. @@ -133,7 +133,7 @@ DataFrameStatsTest >> testStdev [ | expected actual | - expected := { 0.936453 . 0.223607 . 1.971956 . 0.877180 } asDataSeries. + expected := { 0.927512 . 0.223607 . 1.971956 . 0.877180 } asDataSeries. expected name: #stdev. expected keys: df columnNames. @@ -147,7 +147,7 @@ DataFrameStatsTest >> testThirdQuartile [ | expected actual | - expected := { 6.9 . 3.2 . 5.1 . 1.9 } asDataSeries. + expected := { 7 . 3.2 . 5.1 . 1.9 } asDataSeries. expected name: #thirdQuartile. expected keys: df columnNames. @@ -160,7 +160,7 @@ DataFrameStatsTest >> testVariance [ | expected actual | - expected := { 0.876944 . 0.050000 . 3.888611 . 0.769444 } asDataSeries. + expected := { 0.860278 . 0.050000 . 3.888611 . 0.769444 } asDataSeries. expected name: #variance. expected keys: df columnNames. diff --git a/src/DataFrame-Tests/DataSeriesTest.class.st b/src/DataFrame-Tests/DataSeriesTest.class.st index af11bfaf..9ab81b14 100644 --- a/src/DataFrame-Tests/DataSeriesTest.class.st +++ b/src/DataFrame-Tests/DataSeriesTest.class.st @@ -323,7 +323,7 @@ DataSeriesTest >> testCreateDataSeriesAsDataSeries [ self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: #(a b c). self assert: dataSeries keys equals: #(1 2 3). - self assert: dataSeries name isNil. + self assert: dataSeries name equals: '(no name)'. ] { #category : #tests } @@ -341,7 +341,7 @@ DataSeriesTest >> testCreateDataSeriesWithKeysValues [ self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: values. self assert: dataSeries keys equals: keys. - self assert: dataSeries name isNil. + self assert: dataSeries name equals: '(no name)'. ] { #category : #tests } @@ -375,7 +375,7 @@ DataSeriesTest >> testCreateDataSeriesWithValues [ self assert: dataSeries size equals: 3. self assert: dataSeries asArray equals: values. self assert: dataSeries keys equals: #(1 2 3). - self assert: dataSeries name isNil. + self assert: dataSeries name equals: '(no name)'. ] { #category : #tests } @@ -405,7 +405,7 @@ DataSeriesTest >> testCreateEmptyDataSeries [ self assert: dataSeries size equals: 0. self assert: dataSeries asArray equals: #(). self assert: dataSeries keys equals: #(). - self assert: dataSeries name isNil. + self assert: dataSeries name equals: '(no name)'. ] { #category : #tests } diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 91103205..bd045829 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -9,10 +9,13 @@ Class { { #category : #'instance creation' } DataSeries class >> newFrom: aCollection [ + (aCollection species == self) + ifTrue: [ ^ super newFrom: aCollection associations ]. + "If it's a collection of associations use the superclass implementation" ^ super newFrom: ((aCollection first respondsTo: #key) ifTrue: [ aCollection ] - ifFalse: [ aCollection collectWithIndex: [ :each :i | i -> each ] ]). + ifFalse: [ aCollection withIndexCollect: [ :each :i | i -> each ] ]). ] { #category : #'instance creation' } @@ -214,6 +217,13 @@ DataSeries >> initialize [ name := self defaultName. ] +{ #category : #initialization } +DataSeries >> initialize: aCapacity [ + "Make sure that initialize is called and the default name is set" + self initialize. + ^ super initialize: aCapacity. +] + { #category : #statistics } DataSeries >> interquartileRange [ ^ self thirdQuartile - self firstQuartile diff --git a/src/DataFrame/DataSeriesGrouped.class.st b/src/DataFrame/DataSeriesGrouped.class.st index dda6a849..433e5f54 100644 --- a/src/DataFrame/DataSeriesGrouped.class.st +++ b/src/DataFrame/DataSeriesGrouped.class.st @@ -12,16 +12,8 @@ DataSeriesGrouped class >> group: firstSeries by: secondSeries [ { #category : #private } DataSeriesGrouped >> apply: aBlock [ - - | result | - - result := groups collect: [ :eachGroup | + ^ groups collect: [ :eachGroup | aBlock value: eachGroup ]. - - result := result asDataSeries. - result keys: groups keys. - - ^ result ] { #category : #printing } From 5e18b8854cc7d3f9d728bb96dd9f7b4a1f72ddfe Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Thu, 14 Mar 2019 01:47:00 +0100 Subject: [PATCH 8/9] Fixed tests in Pharo 6 --- src/DataFrame-Pharo6/OrderedDictionary.extension.st | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 src/DataFrame-Pharo6/OrderedDictionary.extension.st diff --git a/src/DataFrame-Pharo6/OrderedDictionary.extension.st b/src/DataFrame-Pharo6/OrderedDictionary.extension.st new file mode 100644 index 00000000..f257f4d5 --- /dev/null +++ b/src/DataFrame-Pharo6/OrderedDictionary.extension.st @@ -0,0 +1,13 @@ +Extension { #name : #OrderedDictionary } + +{ #category : #'*DataFrame-Pharo6' } +OrderedDictionary class >> newFromKeys: keys andValues: values [ + "Create a dictionary from the keys and values arguments which should have the same length." + + "(self newFromKeys: #(#x #y) andValues: #(3 6)) >>> (self new at: #x put: 3; at: #y put: 6 ;yourself)" + + | dict | + dict := self new. + keys with: values do: [ :k :v | dict at: k put: v ]. + ^ dict +] From 29f31488ecadb2440a1f701d997d683cb2159691 Mon Sep 17 00:00:00 2001 From: Oleksandr Zaytsev Date: Thu, 14 Mar 2019 02:18:54 +0100 Subject: [PATCH 9/9] Improved code coverage --- .../DataFrameCsvReaderTest.class.st | 41 +++++++++++++++---- .../DataFrameCsvWriterTest.class.st | 11 +++++ src/DataFrame-IO/DataFrame.extension.st | 8 ++-- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/src/DataFrame-IO-Tests/DataFrameCsvReaderTest.class.st b/src/DataFrame-IO-Tests/DataFrameCsvReaderTest.class.st index 5536ed99..a7df2634 100644 --- a/src/DataFrame-IO-Tests/DataFrameCsvReaderTest.class.st +++ b/src/DataFrame-IO-Tests/DataFrameCsvReaderTest.class.st @@ -6,7 +6,9 @@ Class { 'commaCsvFile', 'tabCsvFile', 'emptyCsvFile', - 'expectedDataFrame' + 'expectedDataFrame', + 'dataFrameWithoutRowNames', + 'dataFrameWithRowNames' ], #category : #'DataFrame-IO-Tests' } @@ -32,30 +34,55 @@ DataFrameCsvReaderTest >> setUp [ self createFile: tabCsvFile withContents: TestCsvStrings tabCsvString. self createFile: emptyCsvFile withContents: TestCsvStrings emptyCsvString. - expectedDataFrame := DataFrame withRows: #( + dataFrameWithRowNames := DataFrame withRows: #( (2.4 true 'rain') (0.5 true 'rain') (-1.2 true 'snow') (-2.3 false '-') (3.2 true 'rain')). - expectedDataFrame columnNames: #(temperature precipitation type). - expectedDataFrame rowNames: (#('01:10' '01:30' '01:50' '02:10' '02:30') + dataFrameWithRowNames columnNames: #(temperature precipitation type). + dataFrameWithRowNames rowNames: (#('01:10' '01:30' '01:50' '02:10' '02:30') collect: #asTime). + + dataFrameWithoutRowNames := DataFrame withRows: { + { '01:10' asTime . 2.4 . true . 'rain' } . + { '01:30' asTime . 0.5 . true . 'rain' } . + { '01:50' asTime . -1.2 . true . 'snow' } . + { '02:10' asTime . -2.3 . false . '-' } . + { '02:30' asTime . 3.2 . true . 'rain' }}. + + dataFrameWithoutRowNames columnNames: #(nil temperature precipitation type). ] { #category : #tests } DataFrameCsvReaderTest >> testReadCsv [ + | actualDataFrame | + actualDataFrame := DataFrame readFromCsv: commaCsvFile. + self assert: actualDataFrame equals: dataFrameWithoutRowNames. + +] + +{ #category : #tests } +DataFrameCsvReaderTest >> testReadCsvWithRowNames [ | actualDataFrame | actualDataFrame := DataFrame readFromCsvWithRowNames: commaCsvFile. - self assert: actualDataFrame equals: expectedDataFrame. + self assert: actualDataFrame equals: dataFrameWithRowNames. ] { #category : #tests } -DataFrameCsvReaderTest >> testReadCsvWithSeparatorTab [ +DataFrameCsvReaderTest >> testReadCsvWithRowNamesWithSeparatorTab [ | actualDataFrame | actualDataFrame := DataFrame readFromCsvWithRowNames: tabCsvFile separator: Character tab. - self assert: actualDataFrame equals: expectedDataFrame. + self assert: actualDataFrame equals: dataFrameWithRowNames. + +] + +{ #category : #tests } +DataFrameCsvReaderTest >> testReadCsvWithSeparatorTab [ + | actualDataFrame | + actualDataFrame := DataFrame readFromCsv: tabCsvFile withSeparator: Character tab. + self assert: actualDataFrame equals: dataFrameWithoutRowNames. ] diff --git a/src/DataFrame-IO-Tests/DataFrameCsvWriterTest.class.st b/src/DataFrame-IO-Tests/DataFrameCsvWriterTest.class.st index 923f2c72..71e1288e 100644 --- a/src/DataFrame-IO-Tests/DataFrameCsvWriterTest.class.st +++ b/src/DataFrame-IO-Tests/DataFrameCsvWriterTest.class.st @@ -48,6 +48,17 @@ DataFrameCsvWriterTest >> testWriteToCsv [ self assert: actual lines equals: expected lines. ] +{ #category : #tests } +DataFrameCsvWriterTest >> testWriteToCsvLineEndLf [ + | writer actual expected | + writer := DataFrameCsvWriter new. + writer lineEndConvention: #lf. + dataFrame writeTo: commaQuoteCsvFile using: writer. + actual := self readFile: commaQuoteCsvFile. + expected := String lf join: TestCsvStrings commaQuoteCsvString lines. + self assert: actual lines equals: expected lines. +] + { #category : #tests } DataFrameCsvWriterTest >> testWriteToCsvWithSeparatorTab [ | actual expected | diff --git a/src/DataFrame-IO/DataFrame.extension.st b/src/DataFrame-IO/DataFrame.extension.st index 8f144435..bf82a4b6 100644 --- a/src/DataFrame-IO/DataFrame.extension.st +++ b/src/DataFrame-IO/DataFrame.extension.st @@ -10,7 +10,7 @@ DataFrame class >> readFrom: aLocation using: aDataFrameReader [ DataFrame class >> readFromCsv: aFileReference [ | reader | reader := DataFrameCsvReader new. - ^ reader readFrom: aFileReference. + ^ self readFrom: aFileReference using: reader. ] { #category : #'*DataFrame-IO' } @@ -18,7 +18,7 @@ DataFrame class >> readFromCsv: aFileReference withSeparator: aSeparator [ | reader | reader := DataFrameCsvReader new. reader separator: aSeparator. - ^ reader readFrom: aFileReference. + ^ self readFrom: aFileReference using: reader ] { #category : #'*DataFrame-IO' } @@ -26,7 +26,7 @@ DataFrame class >> readFromCsvWithRowNames: aFileReference [ | reader | reader := DataFrameCsvReader new. reader includeRowNames: true. - ^ reader readFrom: aFileReference. + ^ self readFrom: aFileReference using: reader ] { #category : #'*DataFrame-IO' } @@ -35,7 +35,7 @@ DataFrame class >> readFromCsvWithRowNames: aFileReference separator: aSeparator reader := DataFrameCsvReader new. reader includeRowNames: true. reader separator: aSeparator. - ^ reader readFrom: aFileReference. + ^ self readFrom: aFileReference using: reader ] { #category : #'*DataFrame-IO' }