In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Arrays

In [2]:
heights = make_array(67, 60, 71, 63, 65)
heights

array([67, 60, 71, 63, 65])

In [3]:
heights / 12

array([5.58333333, 5.        , 5.91666667, 5.25      , 5.41666667])

In [4]:
len(heights)

5

In [5]:
sum(heights)

326

In [6]:
sum(heights) / len(heights)

65.2

In [7]:
np.average(heights)

65.2

In [9]:
strings_array = make_array('ok', 'ya', 'hm')
strings_array

array(['ok', 'ya', 'hm'], dtype='<U2')

In [10]:
# This will produce an error
strings_array * 2

UFuncTypeError: ufunc 'multiply' did not contain a loop with signature matching types (dtype('<U3'), dtype('<U3')) -> dtype('<U3')

In [11]:
'ha'*2

'haha'

In [12]:
heights.item(3)

63

In [13]:
heights.item(0)

67

In [14]:
strings_array.item(0)

'ok'

## Ranges ##

In [15]:
make_array(0, 1, 2, 3, 4, 5, 6)

array([0, 1, 2, 3, 4, 5, 6])

In [16]:
np.arange(6)

array([0, 1, 2, 3, 4, 5])

In [17]:
np.arange(7)

array([0, 1, 2, 3, 4, 5, 6])

In [18]:
np.arange(5, 11)

array([ 5,  6,  7,  8,  9, 10])

In [19]:
np.arange(0, 20, 3)

array([ 0,  3,  6,  9, 12, 15, 18])

In [20]:
np.arange(0, 21, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18, 20])

In [21]:
np.arange(0, 1, 0.1)

array([0. , 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])

In [22]:
a = np.arange(8)

In [23]:
a

array([0, 1, 2, 3, 4, 5, 6, 7])

In [27]:
a.item(8)

IndexError: index 8 is out of bounds for axis 0 with size 8

In [28]:
b = np.arange(5)
b

array([0, 1, 2, 3, 4])

In [29]:
b.item(3)

3

In [30]:
food = make_array('pizza', 'spaghetti', 'ice cream', 'donuts', 'crackers', 'tacos', 'nachos', 'chicken', 'sushi', 'apples')
food

array(['pizza', 'spaghetti', 'ice cream', 'donuts', 'crackers', 'tacos',
       'nachos', 'chicken', 'sushi', 'apples'], dtype='<U9')

In [31]:
food.item(7)

'chicken'

## Creating a Table from Scratch ##

In [32]:
streets = make_array('Englewood', 'Knox', 'Green', 'Markham')
streets

array(['Englewood', 'Knox', 'Green', 'Markham'], dtype='<U9')

In [33]:
Table()

In [34]:
southside = Table().with_column('Streets', streets)
southside

Streets
Englewood
Knox
Green
Markham


In [35]:
southside.with_column('Blocks from campus', np.arange(4))

Streets,Blocks from campus
Englewood,0
Knox,1
Green,2
Markham,3


In [36]:
southside

Streets
Englewood
Knox
Green
Markham


In [37]:
southside = southside.with_column('Blocks from campus', np.arange(4))
southside

Streets,Blocks from campus
Englewood,0
Knox,1
Green,2
Markham,3


In [38]:
southside.labels

('Streets', 'Blocks from campus')

In [39]:
southside.num_columns

2

In [40]:
southside.num_rows

4

## Reading a Table from a File  ##

In [None]:
#C Block start here 2/15
du_bois = Table.read_table('data/du_bois.csv')
du_bois

In [None]:
# Which group ("CLASS") spent the highest percentage on rent?
#For future classes, hide this "smile"
du_bois.sort('RENT', descending=True).set_format('RENT', PercentFormatter)

In [None]:
du_bois.select('STATUS')

In [None]:
du_bois.column('STATUS')

In [None]:
du_bois.column('ACTUAL AVERAGE')

In [None]:
du_bois.column('FOOD')

In [None]:
du_bois.column('ACTUAL AVERAGE') * du_bois.column('FOOD')

In [None]:
food_dollars = du_bois.column('ACTUAL AVERAGE') * du_bois.column('FOOD')
du_bois = du_bois.with_column(
    'Food $',
    food_dollars
)
du_bois

In [None]:
du_bois.set_format('FOOD', PercentFormatter)

In [None]:
du_bois.select('CLASS', 'ACTUAL AVERAGE', 'FOOD', 'Food $')

In [None]:
du_bois.column('FOOD')

## Selecting data in a column ##

In [None]:
movies = Table.read_table('data/movies_by_year_with_ticket_price.csv')
movies.show()

In [None]:
gross_in_dollars = movies.column('Total Gross') * 1e6
tix_sold = gross_in_dollars / movies.column('Average Ticket Price')

In [None]:
movies = movies.with_column('Tickets sold', tix_sold)

In [None]:
movies.show(4)

In [None]:
movies.set_format('Tickets sold', NumberFormatter)

In [None]:
movies.plot('Year', 'Tickets sold')

In [None]:
movies.where('Year', are.between(2000, 2005))

In [None]:
movies.where('Year', 2002)

In [None]:
movies.where('Year', are.equal_to(2002))

In [None]:
movies.where('#1 Movie', are.containing('Harry Potter'))

In [None]:
movies.take(np.arange(2, 5))