diff --git a/src/DataFrame-Tests/DataFrameTest.class.st b/src/DataFrame-Tests/DataFrameTest.class.st index 988050df..20f33daa 100644 --- a/src/DataFrame-Tests/DataFrameTest.class.st +++ b/src/DataFrame-Tests/DataFrameTest.class.st @@ -1552,6 +1552,26 @@ DataFrameTest >> testDataTypesWithNil [ self assert: df dataTypes equals: expected ] +{ #category : #tests } +DataFrameTest >> testDescribe [ + + | dataFrame expected | + dataFrame := DataFrame + withRows: #( #( 1 1 ) #( 2 nil ) #( 3 1 ) ) + columnNames: #( 'A' 'B' ). + + expected := DataFrame withRows: + #( #( 3 2 1 1 1 2 3 3 SmallInteger ) + #( 2 1 0 1 1 1 1 1 SmallInteger ) ). + + expected rowNames: #( 'A' 'B' ). + expected columnNames: + #( 'count' 'mean' 'std' 'min' '25%' '50%' '75%' 'max' 'dtype' ). + expected at: 1 at: 9 put: SmallInteger. + expected at: 2 at: 9 put: SmallInteger. + self assert: dataFrame describe equals: expected +] + { #category : #tests } DataFrameTest >> testDetect [ | actual expected | diff --git a/src/DataFrame/DataFrame.class.st b/src/DataFrame/DataFrame.class.st index 83f2744f..dbc5aa20 100644 --- a/src/DataFrame/DataFrame.class.st +++ b/src/DataFrame/DataFrame.class.st @@ -941,6 +941,29 @@ DataFrame >> defaultHeadTailSize [ ^ 5 ] +{ #category : #statistics } +DataFrame >> describe [ + "Answer another data frame with statistics describing the columns of this data frame" + + | content | + content := self numericalColumns collect: [ :column | + { + column countNonNils. + column average. + column stdev. + column min. + column firstQuartile. + column secondQuartile. + column thirdQuartile. + column max. + column calculateDataType } ]. + + ^ self class + withRows: content + rowNames: self numericalColumnNames + columnNames: #( count mean std min '25%' '50%' '75%' max dtype ) +] + { #category : #accessing } DataFrame >> dimensions [ "Returns the number of rows and number of columns in a DataFrame"