In [4]:
import datascience as ds
import numpy as np
%matplotlib inline
cones = ds.Table().with_columns(
    'Flavor', ds.make_array('strawberry', 'chocolate', 'chocolate', 'strawberry', 'chocolate'),
    'Price', ds.make_array(3.55, 4.75, 6.55, 5.25, 5.25)
)
cones

Flavor,Price
strawberry,3.55
chocolate,4.75
chocolate,6.55
strawberry,5.25
chocolate,5.25


In [5]:
cones.group('Flavor') ## by default group just counts instances, 
## but it is possible to add functions as a 2nd argument

Flavor,count
chocolate,3
strawberry,2


In [7]:
def average(sequence):
    return(sum(sequence)/len(sequence))
print(cones.group('Flavor',sum))
print()
print(cones.group('Flavor',average))


Flavor     | Price sum
chocolate  | 16.55
strawberry | 8.8

Flavor     | Price average
chocolate  | 5.51667
strawberry | 4.4


In [9]:
cones_choc = cones.where('Flavor', ds.are.equal_to('chocolate')).column('Price')
cones_choc

array([4.75, 6.55, 5.25])

In [18]:
cones_choc = cones.where('Flavor', ds.are.equal_to('chocolate')).column('Price')
cones_strawb = cones.where('Flavor', ds.are.equal_to('strawberry')).column('Price')
grouped_cones = ds.Table().with_columns(
    'Flavor', ds.make_array('chocolate', 'strawberry'),
    'Array of All the Prices', ds.make_array(cones_choc, cones_strawb)
)
print(grouped_cones)
print()
lengths = grouped_cones.with_column(
    'Length of the Array', ds.make_array(len(cones_choc), len(cones_strawb))
)
print(lengths)

Flavor     | Array of All the Prices
chocolate  | [4.75 6.55 5.25]
strawberry | [3.55 5.25]

Flavor     | Array of All the Prices | Length of the Array
chocolate  | [4.75 6.55 5.25]        | 3
strawberry | [3.55 5.25]             | 2


In [25]:
more_cones = ds.Table().with_columns(
    'Flavor', ds.make_array('strawberry', 'chocolate', 'chocolate', 'strawberry', 'chocolate', 'bubblegum',),
    'Color', ds.make_array('pink', 'light brown', 'dark brown', 'pink', 'dark brown', 'pink'),
    'Price', ds.make_array(3.55, 4.75, 5.25, 5.25, 5.25, 4.75)
)

more_cones


Flavor,Color,Price
strawberry,pink,3.55
chocolate,light brown,4.75
chocolate,dark brown,5.25
strawberry,pink,5.25
chocolate,dark brown,5.25
bubblegum,pink,4.75


In [31]:
print(more_cones.group('Flavor'))
print()
print(more_cones.group(['Flavor', 'Color']))
print()
print(more_cones.group(['Flavor', 'Color','Price']))

Flavor     | count
bubblegum  | 1
chocolate  | 3
strawberry | 2

Flavor     | Color       | count
bubblegum  | pink        | 1
chocolate  | dark brown  | 2
chocolate  | light brown | 1
strawberry | pink        | 2

Flavor     | Color       | Price | count
bubblegum  | pink        | 4.75  | 1
chocolate  | dark brown  | 5.25  | 2
chocolate  | light brown | 4.75  | 1
strawberry | pink        | 3.55  | 1
strawberry | pink        | 5.25  | 1


In [34]:
print(more_cones.group(['Flavor', 'Color'], sum))
print()
print(more_cones.group(['Flavor', 'Color'], average) )## average -- defined in a previous cell

Flavor     | Color       | Price sum
bubblegum  | pink        | 4.75
chocolate  | dark brown  | 10.5
chocolate  | light brown | 4.75
strawberry | pink        | 8.8

Flavor     | Color       | Price average
bubblegum  | pink        | 4.75
chocolate  | dark brown  | 5.25
chocolate  | light brown | 4.75
strawberry | pink        | 4.4


In [37]:
print(more_cones)  ## pivot changes the first column -- the column 
## that the other column labels are being compared to
print()
print(more_cones.pivot('Flavor', 'Color', values='Price', collect=sum))

Flavor     | Color       | Price
strawberry | pink        | 3.55
chocolate  | light brown | 4.75
chocolate  | dark brown  | 5.25
strawberry | pink        | 5.25
chocolate  | dark brown  | 5.25
bubblegum  | pink        | 4.75

Color       | bubblegum | chocolate | strawberry
dark brown  | 0         | 10.5      | 0
light brown | 0         | 4.75      | 0
pink        | 4.75      | 0         | 8.8


In [42]:
print('2 Related Tables\n\n')
cones = ds.Table().with_columns(
    'Flavor', ds.make_array('strawberry', 'vanilla', 'chocolate', 'strawberry', 'chocolate'),
    'Price', ds.make_array(3.55, 4.75, 6.55, 5.25, 5.75)
)
print('cones\n\n',cones)
ratings = ds.Table().with_columns(
    'Kind', ds.make_array('strawberry', 'chocolate', 'vanilla'),
    'Stars', ds.make_array(2.5, 3.5, 4)
)
print('\n\nratings\n\n',ratings)


2 Related Tables


cones

 Flavor     | Price
strawberry | 3.55
vanilla    | 4.75
chocolate  | 6.55
strawberry | 5.25
chocolate  | 5.75


ratings

 Kind       | Stars
strawberry | 2.5
chocolate  | 3.5
vanilla    | 4


In [43]:
rated = cones.join('Flavor', ratings, 'Kind')
print('2 Tables joined together\n',rated)

2 Tables joined together
 Flavor     | Price | Stars
chocolate  | 6.55  | 3.5
chocolate  | 5.75  | 3.5
strawberry | 3.55  | 2.5
strawberry | 5.25  | 2.5
vanilla    | 4.75  | 4


In [44]:
print(rated.with_column('$/Star', rated.column('Price') / rated.column('Stars')).sort(3))
## adding a column that merges information from the two original columns

Flavor     | Price | Stars | $/Star
vanilla    | 4.75  | 4     | 1.1875
strawberry | 3.55  | 2.5   | 1.42
chocolate  | 5.75  | 3.5   | 1.64286
chocolate  | 6.55  | 3.5   | 1.87143
strawberry | 5.25  | 2.5   | 2.1
