Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 18 additions & 3 deletions src/DataFrame-Tests/DataSeriesTest.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -874,10 +874,11 @@ DataSeriesTest >> testCategoricalIsNotNumerical [

{ #category : #'tests - categorical' }
DataSeriesTest >> testCategoricalUniqueValues [

| aSeries expected actual |
aSeries := DataSeries withKeys: #(a b c d e) values: #(z y y z x).
expected := #(x y z).
actual := aSeries uniqueValues.
aSeries := DataSeries withKeys: #( a b c d e ) values: #( z y y z x ).
expected := #( x y z ).
actual := aSeries removeDuplicates.
self assert: actual equals: expected
]

Expand Down Expand Up @@ -1680,6 +1681,20 @@ DataSeriesTest >> testRemoveAtIndex [
self assert: series equals: expected
]

{ #category : #tests }
DataSeriesTest >> testRemoveDuplicates [

| dataseries expected |
dataseries := DataSeries
withKeys: #( 'A' 'B' 'C' 'D' 'E' )
values: #( 7 1 1 1 3 )
name: series.

expected := #( 7 1 3 ).

self assert: dataseries removeDuplicates equals: expected
]

{ #category : #'tests - removing' }
DataSeriesTest >> testRemoveNils [

Expand Down
68 changes: 45 additions & 23 deletions src/DataFrame/DataSeries.class.st
Original file line number Diff line number Diff line change
Expand Up @@ -253,22 +253,21 @@ DataSeries >> countNonNils [
{ #category : #statistics }
DataSeries >> crossTabulateWith: aSeries [
"A DataFrame is returned which is useful in quantitatively analyzing the relationship of values in one data series with the values in another data series"

| df |

(self size = aSeries size)
ifFalse: [ SizeMismatch signal ].
| df |
self size = aSeries size ifFalse: [ SizeMismatch signal ].

df := DataFrame withRows:
(self uniqueValues sortIfPossible collect: [ :each1 |
aSeries uniqueValues sortIfPossible collect: [ :each2 |
(1 to: self size) inject: 0 into: [ :accum :i |
(((self atIndex: i) = each1) and: ((aSeries atIndex: i) = each2))
ifTrue: [ accum + 1 ]
ifFalse: [ accum ] ] ] ]).

df rowNames: self uniqueValues sortIfPossible.
df columnNames: aSeries uniqueValues sortIfPossible.
(self removeDuplicates sortIfPossible collect: [ :each1 |
aSeries removeDuplicates sortIfPossible collect: [ :each2 |
(1 to: self size) inject: 0 into: [ :accum :i |
((self atIndex: i) = each1 and:
(aSeries atIndex: i) = each2)
ifTrue: [ accum + 1 ]
ifFalse: [ accum ] ] ] ]).

df rowNames: self removeDuplicates sortIfPossible.
df columnNames: aSeries removeDuplicates sortIfPossible.
^ df
]

Expand Down Expand Up @@ -365,11 +364,17 @@ DataSeries >> groupBy: otherSeries aggregateUsing: aBlock as: aNewName [
| groupMap |
self size = otherSeries size ifFalse: [ SizeMismatch signal ].

groupMap := (otherSeries uniqueValues sortIfPossible collect: [ :e | e -> OrderedCollection new ]) asOrderedDictionary.
groupMap := (otherSeries removeDuplicates sortIfPossible collect: [
:e | e -> OrderedCollection new ]) asOrderedDictionary.

1 to: self size do: [ :index | (groupMap at: (otherSeries atIndex: index)) add: (self atIndex: index) ].
1 to: self size do: [ :index |
(groupMap at: (otherSeries atIndex: index)) add:
(self atIndex: index) ].

^ self class withKeys: groupMap keys values: (groupMap values collect: aBlock) name: aNewName
^ self class
withKeys: groupMap keys
values: (groupMap values collect: aBlock)
name: aNewName
]

{ #category : #grouping }
Expand Down Expand Up @@ -406,11 +411,15 @@ DataSeries >> groupByUniqueValuesAndAggregateUsing: aBlock as: aNewName [
"Group my values by unique values, aggregate them using aBlock, and answer a new DataSeries with theunique values as keys, aggregated values of myself as values, and aNewName as name"

| groupMap |
groupMap := (self uniqueValues sortIfPossible collect: [ :e | e -> OrderedCollection new ]) asOrderedDictionary.
groupMap := (self removeDuplicates sortIfPossible collect: [ :e |
e -> OrderedCollection new ]) asOrderedDictionary.

self do: [ :each | (groupMap at: each) add: each ].

^ self class withKeys: groupMap keys values: (groupMap values collect: aBlock) name: aNewName
^ self class
withKeys: groupMap keys
values: (groupMap values collect: aBlock)
name: aNewName
]

{ #category : #testing }
Expand Down Expand Up @@ -466,8 +475,10 @@ DataSeries >> isCategorical [
{ #category : #'categorical-numerical' }
DataSeries >> isNumerical [
"Returns true if all values of the data series are numerical values and returns false otherwise"

^ forcedIsNumerical ifNil: [ (self uniqueValues copyWithout: nil) allSatisfy: [ :each | each isNumber ] ]

^ forcedIsNumerical ifNil: [
(self removeDuplicates copyWithout: nil) allSatisfy: [ :each |
each isNumber ] ]
]

{ #category : #testing }
Expand Down Expand Up @@ -583,6 +594,13 @@ DataSeries >> removeAtIndex: aNumber [
^ self removeAt: (self keys at: aNumber)
]

{ #category : #removing }
DataSeries >> removeDuplicates [
"Answer the unique values of the receiver by removing duplicates"

^ self asSet asArray
]

{ #category : #removing }
DataSeries >> removeNils [
"Removes elements with nil values from the data series"
Expand Down Expand Up @@ -801,9 +819,13 @@ DataSeries >> thirdQuartile [

{ #category : #accessing }
DataSeries >> uniqueValues [
"Answer the unique values of the receiver"

^ self asSet asArray

self
deprecated:
'The name of this method has been changed to removeDuplicates.'
transformWith:
'`@receiver uniqueValues' -> '`@receiver removeDuplicates'.
^ self removeDuplicates
]

{ #category : #statistics }
Expand Down