From 0d9faace92bd62c65d2115702a2153c53efc1d60 Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 16:19:24 +0530 Subject: [PATCH 01/10] Runnable examples for transformation methods. --- src/DataFrame/DataSeries.class.st | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 99e9380b..2f9750ec 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -930,6 +930,12 @@ DataSeries >> stdev [ DataSeries >> sum [ "Return the sum of the values over the requested axis. Nil values are excluded." + "(#(1 1 1) asDataSeries sum) >>> 3" + + "(#(1 nil 1) asDataSeries sum) >>> 2" + + "(#(1 1.1 1) asDataSeries sum) >>> 3.1" + | result | result := 0. self do: [ :each | each ifNotNil: [ result := result + each ] ]. From 042f6d4801980571cbee3d2d744e1e5838738857 Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 16:20:45 +0530 Subject: [PATCH 02/10] Runnable examples for DataSeries testing methods. --- src/DataFrame/DataSeries.class.st | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 2f9750ec..1287b76d 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -515,7 +515,16 @@ DataSeries >> groupByUniqueValuesAndAggregateUsing: aBlock as: aNewName [ { #category : #testing } DataSeries >> hasNil [ "return true if data series has at least one nil value" - ^ self includes: nil. + + "(#(a nil b) asDataSeries hasNil) >>> true" + + "(#(a 'nil' b) asDataSeries hasNil) >>> false" + + "(#(1 nil 3) asDataSeries hasNil) >>> true" + + "(#(1 0 3) asDataSeries hasNil) >>> false" + + ^ self includes: nil ] { #category : #slicing } From 952d2ce30fa3e1812f78d25344b6c9ea4fb75683 Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 16:39:00 +0530 Subject: [PATCH 03/10] Runnable examples for DataSeries sorting methods. --- src/DataFrame/DataSeries.class.st | 34 +++++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 1287b76d..335b2eb3 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -868,7 +868,11 @@ DataSeries >> sixth [ { #category : #sorting } DataSeries >> sort [ "Arranges a data series in ascending order of its values" - + + "(#(a c b) asDataSeries sort) >>> (DataSeries withKeys: #(1 3 2) values: #(a b c))" + + "(#(500 5 37) asDataSeries sort) >>> (DataSeries withKeys: #(2 3 1) values: #(5 37 500))" + self sort: [ :a :b | a <= b ] ] @@ -876,6 +880,13 @@ DataSeries >> sort [ DataSeries >> sort: aBlock [ "Arranges a data series by applying aBlock on its values" + "(#( z aaa cc ) asDataSeries sort: [ :a :b | + a asString size < b asString size ]) + >>> (DataSeries withKeys: #( 1 3 2 ) values: #( z cc aaa ))." + + "(#( 500 5 37 ) asDataSeries sort: [ :a :b | a >= b ]) + >>> (DataSeries withKeys: #( 1 3 2 ) values: #( 500 37 5 ))" + | associationBlock | associationBlock := [ :a :b | aBlock value: a value value: b value ]. self sortAssociations: associationBlock @@ -893,13 +904,21 @@ DataSeries >> sortAssociations: aBlock [ DataSeries >> sortDescending [ "Arranges a data series in descending order of its values" + "(#(a c b) asDataSeries sortDescending) >>> (DataSeries withKeys: #(2 3 1) values: #(c b a))" + + "(#(500 5 37) asDataSeries sortDescending) >>> (DataSeries withKeys: #(1 3 2) values: #(500 37 5))" + self sort: [ :a :b | a > b ] ] { #category : #sorting } DataSeries >> sorted [ "Returns a sorted copy of the data series without rearranging the original data series" - + + "(#(a c b) asDataSeries sorted) >>> (DataSeries withKeys: #(1 3 2) values: #(a b c))" + + "(#(500 5 37) asDataSeries sorted) >>> (DataSeries withKeys: #(2 3 1) values: #(5 37 500))" + ^ self sorted: [ :a :b | a <= b ] ] @@ -907,6 +926,13 @@ DataSeries >> sorted [ DataSeries >> sorted: aBlock [ "Returns a copy of the data series after applying aBlock without rearranging the original data series" + "(#( z aaa cc ) asDataSeries sorted: [ :a :b | + a asString size < b asString size ]) + >>> (DataSeries withKeys: #( 1 3 2 ) values: #( z cc aaa ))." + + "(#( 500 5 37 ) asDataSeries sorted: [ :a :b | a >= b ]) + >>> (DataSeries withKeys: #( 1 3 2 ) values: #( 500 37 5 ))" + | associationBlock | associationBlock := [ :a :b | aBlock value: a value value: b value ]. ^ self sortedAssociations: associationBlock @@ -923,6 +949,10 @@ DataSeries >> sortedAssociations: aBlock [ DataSeries >> sortedDescending [ "Returns a sorted copy of the data series in descending order without rearranging the original data series" + "(#(a c b) asDataSeries sortedDescending) >>> (DataSeries withKeys: #(2 3 1) values: #(c b a))" + + "(#(50 5 37) asDataSeries sortedDescending) >>> (DataSeries withKeys: #(1 3 2) values: #(50 37 5))" + ^ self sorted: [ :a :b | a > b ] ] From a1223399122b813a8c65dd9ceefab700de4072ec Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 16:49:48 +0530 Subject: [PATCH 04/10] Runnable examples for DataSeries slicing methods. --- src/DataFrame/DataSeries.class.st | 32 ++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 335b2eb3..48b1507d 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -530,7 +530,11 @@ DataSeries >> hasNil [ { #category : #slicing } DataSeries >> head [ "Returns a data series with first 5 elements of the receiver" - + + "(#(a b c d e f g h i j) asDataSeries head) >>> (#(a b c d e) asDataSeries)" + + "(#(1 2 3 4 5 6 7 8 9 10) asDataSeries head) >>> (#(1 2 3 4 5) asDataSeries)" + ^ self head: self defaultHeadTailSize ] @@ -538,10 +542,14 @@ DataSeries >> head [ DataSeries >> head: aNumber [ "Returns a data series with first aNumber elements of the receiver" + "(#(a b c d e f g h i j) asDataSeries head: 3) >>> (#(a b c) asDataSeries)" + + "(#(1 2 3 4 5 6 7 8 9 10) asDataSeries head: 1) >>> (#(1) asDataSeries)" + ^ self species - withKeys: (self keys copyFrom: 1 to: aNumber) - values: (self values copyFrom: 1 to: aNumber) - name: self name + withKeys: (self keys copyFrom: 1 to: aNumber) + values: (self values copyFrom: 1 to: aNumber) + name: self name ] { #category : #initialization } @@ -1004,6 +1012,10 @@ DataSeries >> summary [ DataSeries >> tail [ "Returns a data series with last 5 elements of the receiver" + "(#(a b c d e f) asDataSeries tail) >>> (DataSeries withKeys: #(2 3 4 5 6) values: #(b c d e f) )" + + "(#(1 2 3 4 5 6 7) asDataSeries tail) >>> (DataSeries withKeys: #(3 4 5 6 7) values: #(3 4 5 6 7) )" + ^ self tail: self defaultHeadTailSize ] @@ -1011,10 +1023,16 @@ DataSeries >> tail [ DataSeries >> tail: aNumber [ "Returns a data series with last aNumber elements of the receiver" + "(#(a b c d e f) asDataSeries tail: 3) >>> (DataSeries withKeys: #(4 5 6) values: #(d e f) )" + + "(#(1 2 3 4 5 6 7) asDataSeries tail: 2) >>> (DataSeries withKeys: #(6 7) values: #(6 7) )" + ^ self species - withKeys: (self keys copyFrom: self size - aNumber + 1 to: self size) - values: (self values copyFrom: self size - aNumber + 1 to: self size) - name: self name + withKeys: + (self keys copyFrom: self size - aNumber + 1 to: self size) + values: + (self values copyFrom: self size - aNumber + 1 to: self size) + name: self name ] { #category : #accessing } From 85d374654facc1a48fa36a2f7b7501608a1d1500 Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 17:05:18 +0530 Subject: [PATCH 05/10] Runnable examples for DataSeries replacing methods. --- src/DataFrame/DataSeries.class.st | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 48b1507d..c20b69c4 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -778,13 +778,22 @@ DataSeries >> removeNils [ DataSeries >> replaceNilsWith: anObject [ "Replaces nils inplace with anObject" - self withIndexDo: [ :ele :index | ele ifNil: [ self atIndex: index put: anObject ] ] + "(#(a 'nil' nil d nil) asDataSeries replaceNilsWith: #b) >>> (#(a 'nil' b d b) asDataSeries)" + + "(#(1 0 nil 3 nil) asDataSeries replaceNilsWith: 7) >>> (#(1 0 7 3 7) asDataSeries)" + + self withIndexDo: [ :ele :index | + ele ifNil: [ self atIndex: index put: anObject ] ] ] { #category : #replacing } DataSeries >> replaceNilsWithAverage [ "Replaces nils inplace with average" + "(#(1 2 nil 3 nil) asDataSeries replaceNilsWithAverage) >>> (#(1 2 2 3 2) asDataSeries)" + + "(#(3 6 2 9 nil) asDataSeries replaceNilsWithAverage) >>> (#(3 6 2 9 5) asDataSeries)" + | mean | mean := (self select: [ :ele | ele isNotNil ]) average. self replaceNilsWith: mean @@ -794,6 +803,10 @@ DataSeries >> replaceNilsWithAverage [ DataSeries >> replaceNilsWithMedian [ "Replaces nils inplace with median" + "(#(1 2 nil 3) asDataSeries replaceNilsWithMedian) >>> (#(1 2 2 3) asDataSeries)" + + "(#(3 7 nil 9 nil) asDataSeries replaceNilsWithMedian) >>> (#(3 7 7 9 7) asDataSeries)" + | median | median := (self select: [ :ele | ele isNotNil ]) median. self replaceNilsWith: median @@ -803,6 +816,10 @@ DataSeries >> replaceNilsWithMedian [ DataSeries >> replaceNilsWithMode [ "Replaces nils inplace with mode" + "(#(1 2 1 3 nil) asDataSeries replaceNilsWithMode) >>> (#(1 2 1 3 1) asDataSeries)" + + "(#(a a a b nil) asDataSeries replaceNilsWithMode) >>> (#(a a a b a) asDataSeries)" + | mode | mode := (self select: [ :ele | ele isNotNil ]) mode. self replaceNilsWith: mode @@ -810,9 +827,12 @@ DataSeries >> replaceNilsWithMode [ { #category : #replacing } DataSeries >> replaceNilsWithPreviousValue [ - "Replaces nils inplace with previous non-nil value" + "(#(nil 2 nil 3 nil) asDataSeries replaceNilsWithPreviousValue) >>> (#(nil 2 2 3 3) asDataSeries)" + + "(#(a nil b c nil) asDataSeries replaceNilsWithPreviousValue) >>> (#(a a b c c) asDataSeries)" + | value | self withIndexDo: [ :ele :index | index > 1 ifTrue: [ ele ifNil: [ self atIndex: index put: value ] ]. @@ -823,6 +843,10 @@ DataSeries >> replaceNilsWithPreviousValue [ DataSeries >> replaceNilsWithZeros [ "Replaces nils inplace with zero" + "(#(1 2 nil 3 nil) asDataSeries replaceNilsWithZeros) >>> (#(1 2 0 3 0) asDataSeries)" + + "(#(a b c d nil) asDataSeries replaceNilsWithZeros) >>> (#(a b c d 0) asDataSeries)" + self replaceNilsWith: 0 ] From b696b5bbe5ab85252880dea885e9d6fd10f0e7e3 Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 19:33:39 +0530 Subject: [PATCH 06/10] Runnable examples for DataSeries removing methods. --- src/DataFrame/DataSeries.class.st | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index c20b69c4..05bad413 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -745,7 +745,7 @@ DataSeries >> reject: aBlock [ { #category : #removing } DataSeries >> removeAt: aKey [ "Removes element from the data series with key aKey" - + ^ self removeKey: aKey ] @@ -760,6 +760,10 @@ DataSeries >> removeAtIndex: aNumber [ DataSeries >> removeDuplicates [ "Answer the unique values of the receiver by removing duplicates" + "(#(1 2 3 3 2) asDataSeries removeDuplicates) >>> (#(1 2 3))" + + "(#(c d b c d d) asDataSeries removeDuplicates) >>> (#(#c #d #b))" + ^ self asSet asArray ] @@ -767,6 +771,10 @@ DataSeries >> removeDuplicates [ DataSeries >> removeNils [ "Removes elements with nil values from the data series" + "(#(nil 1 nil nil 2) asDataSeries removeNils) >>> (DataSeries withKeys: #(2 5) values: #(1 2))" + + "(#(a b 'nil' nil nil nil) asDataSeries removeNils) >>> (#(a b 'nil') asDataSeries)" + | keysWithNilValues | keysWithNilValues := OrderedCollection new. self associationsDo: [ :each | From 23f03cc8a425900a7790bfe0f355b9e85ff051bc Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 19:35:42 +0530 Subject: [PATCH 07/10] Runnable examples for DataSeries private methods. --- src/DataFrame/DataSeries.class.st | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 05bad413..e0f1d4e8 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -1260,6 +1260,10 @@ DataSeries >> withSeries: otherDataSeries collect: twoArgBlock [ DataSeries >> withoutNils [ "Returns a copy of the data series without the nil values" + "(#(nil 1 nil nil 2) asDataSeries withoutNils) >>> (DataSeries withKeys: #(2 5) values: #(1 2))" + + "(#(a b 'nil' nil nil nil) asDataSeries withoutNils) >>> (#(a b 'nil') asDataSeries)" + ^ self reject: #isNil ] From 4b9204fcf3503fc3aa8178d8ecb8d9ac65e39dd2 Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 19:53:53 +0530 Subject: [PATCH 08/10] Runnable examples for DataSeries math functions. --- src/DataFrame/DataSeries.class.st | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index e0f1d4e8..7bf1b6a2 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -270,12 +270,24 @@ DataSeries >> collectWithNotNils: aBlock [ { #category : #'math functions' } DataSeries >> correlationWith: otherSeries [ "Calculate the Pearson correlation coefficient between self and the other series" - ^ self correlationWith: otherSeries using: DataPearsonCorrelationMethod + + "((#(1 2 4) asDataSeries) correlationWith: (#(2 4 8) asDataSeries)) >>> 1." + + "((#(1 2 4) asDataSeries) correlationWith: (#(-3 -6 -12) asDataSeries)) >>> -1." + + ^ self + correlationWith: otherSeries + using: DataPearsonCorrelationMethod ] { #category : #'math functions' } DataSeries >> correlationWith: otherSeries using: aCorrelationCoefficient [ "Calculate the correlation coefficient between self and the other series using the given method" + + "((#(1 2 4) asDataSeries) correlationWith: (#(2 4 8) asDataSeries) using: DataPearsonCorrelationMethod) >>> 1." + + "((#(1 2 4) asDataSeries) correlationWith: (#(-3 -6 -12) asDataSeries) using: DataPearsonCorrelationMethod) >>> -1." + ^ aCorrelationCoefficient between: self and: otherSeries ] @@ -627,6 +639,12 @@ DataSeries >> last [ { #category : #'math functions' } DataSeries >> log: base [ + "Returns a data series containing the logarithm of each value in the receiver using the specified base." + + "(#(1 2 4 8 16) asDataSeries log: 2) >>> (#(0.0 1.0 2.0 3.0 4.0) asDataSeries)" + + "(#(1 10 100) asDataSeries log: 10) >>> (#(0.0 1.0 2.0) asDataSeries)" + ^ self collect: [ :each | each log: base ] ] From 1181d4555f5ca076c1b06c1f9c5eafc4934aec06 Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 20:00:00 +0530 Subject: [PATCH 09/10] Runnable examples for DataSeries data-types. --- src/DataFrame/DataSeries.class.st | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 7bf1b6a2..0f9dfea6 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -220,7 +220,15 @@ DataSeries >> average [ { #category : #'data-types' } DataSeries >> calculateDataType [ "Returns the data type of the data series" - + + "(#(1 2 3) asDataSeries calculateDataType) >>> SmallInteger" + + "(#(1 a 3) asDataSeries calculateDataType) >>> Object" + + "(#(1.1 2.5 3.7) asDataSeries calculateDataType) >>> SmallFloat64" + + "(#(1.1 2.5 3) asDataSeries calculateDataType) >>> Number" + ^ self values calculateDataType ] From 51ce6f4f31e72628a0747eeca075e39ca9b135e5 Mon Sep 17 00:00:00 2001 From: Joshua-Dias-Barreto Date: Wed, 14 Jun 2023 20:07:56 +0530 Subject: [PATCH 10/10] Runnable examples for DataSeries converting methods. --- src/DataFrame/DataSeries.class.st | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/DataFrame/DataSeries.class.st b/src/DataFrame/DataSeries.class.st index 0f9dfea6..06921611 100644 --- a/src/DataFrame/DataSeries.class.st +++ b/src/DataFrame/DataSeries.class.st @@ -387,6 +387,12 @@ DataSeries >> eighth [ DataSeries >> encodeOneHot [ "Encode the values of the DataSeries into one-hot vectors." + "(#(a b) asDataSeries encodeOneHot) >>>(#(#(1 0) #(0 1))asDataSeries) " + + "(#(1 2 3) asDataSeries encodeOneHot) >>>(#(#(1 0 0) #(0 1 0) #(0 0 1))asDataSeries) " + + "(#(23 0.5 542) asDataSeries encodeOneHot) >>>(#(#(0 1 0) #(1 0 0) #(0 0 1))asDataSeries) " + | uniqueValues encodingDataSeries oneHotValues | uniqueValues := self removeDuplicates sortIfPossible. encodingDataSeries := self class new.