In [None]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

## Review: Arrays

In [None]:
my_array = make_array(1, 2, 3, 4)

In [None]:
my_array

In [None]:
my_array * 2

In [None]:
my_array ** 2

In [None]:
my_array + 1

In [None]:
my_array # array is unchanged

In [None]:
len(my_array)

In [None]:
sum(my_array)

In [None]:
sum(my_array) / len(my_array)

In [None]:
np.average(my_array)

In [None]:
another = make_array(70, 60, 90, 80)

In [None]:
my_array + another

In [None]:
yet_another = make_array(5, 6, 7)

In [None]:
my_array + yet_another

In [None]:
tunas = make_array('bluefin', 'albacore', 'jim')
tunas

In [None]:
tunas * 4

In [None]:
tunas.item(0) # NOTE: indexing starts at 0!

In [None]:
tunas.item(2)

In [None]:
tunas.item(3)

## Columns of Tables are Arrays ##

In [None]:
nba = Table.read_table('nba_salaries_2021.csv').relabel(3, "Salary")
nba

In [None]:
bulls = nba.where('Team', 'Chicago Bulls')
bulls

In [None]:
bulls.select('Salary')

In [None]:
bulls.column('Salary')

In [None]:
np.average(bulls.column('Salary'))

In [None]:
warriors = nba.where('Team', 'Golden State Warriors')

In [None]:
np.average(bulls.column('Salary')) - np.average(warriors.column('Salary'))

## Ranges ##

In [None]:
make_array(0, 1, 2, 3, 4, 5, 6)

In [None]:
np.arange(7)

In [None]:
np.arange(5, 11)

In [None]:
np.arange(0, 20, 2)

In [None]:
np.arange(0, 21, 2)

In [None]:
np.arange(0, 1, 0.1)

# Ways to Create a Table #

## Creating a Table from Scratch ##

In [None]:
streets = make_array('Michigan', 'Wabash', 'State', 'Dearborn')
streets

In [None]:
Table()

In [None]:
downtown = Table().with_column('Streets', streets)
downtown

In [None]:
downtown.with_column('Blocks from the Bean', np.arange(4))

In [None]:
downtown

In [None]:
downtown = downtown.with_column('Blocks from the Bean', np.arange(4))
downtown

In [None]:
Table().with_columns(
    'Streets', streets,
    'Blocks from the Bean', np.arange(4)
)

## Reading a Table from a File  ##

In [None]:
du_bois = Table.read_table('du_bois.csv')
du_bois

In [None]:
du_bois.column('ACTUAL AVERAGE')

In [None]:
du_bois.column('FOOD')

In [None]:
du_bois.column('ACTUAL AVERAGE') * du_bois.column('FOOD')

In [None]:
food_dollars = du_bois.column('ACTUAL AVERAGE') * du_bois.column('FOOD')
du_bois.with_columns('Food $', food_dollars)

In [None]:
du_bois.select('CLASS', 'ACTUAL AVERAGE', 'FOOD', 'Food $')

In [None]:
food_dollars = du_bois.column('ACTUAL AVERAGE') * du_bois.column('FOOD')

du_bois = du_bois.with_columns('Food $', food_dollars)

du_bois

In [None]:
du_bois.select('CLASS', 'ACTUAL AVERAGE', 'FOOD', 'Food $')

In [None]:
du_bois.labels

In [None]:
du_bois.num_rows

## Table Practice: Welcome Survey

In [None]:
welcome = Table.read_table('welcome_survey_sp21.csv').drop("Timestamp")
welcome.show(5)

In [None]:
# On average, how many countries have introverts visited?
introverts = welcome.where('intro_extra', 'Introverted')
np.average(introverts.column('countries'))

In [None]:
# How many students get at least 8 hours of sleep each night (on average)?
# First way:
welcome.where('sleep', are.above_or_equal_to(8)).num_rows

In [None]:
# Second way
np.count_nonzero(welcome.column('sleep') >= 8)

In [None]:
# Third way
np.sum(welcome.column('sleep') >= 8)

In [None]:
# Create a table with only the 'sleep' and 'intro_extra' columns
two_col = welcome.select('sleep', 'intro_extra')
two_col = two_col.relabeled(0, 'Avg Hours of Sleep').relabeled(1, 'Introversion/Extraversion')
two_col.show(3)

## Discussion Question: NBA Salaries

In [None]:
nba = Table.read_table('nba_salaries_2021.csv')
nba = nba.drop('Team')
nba.show(3)

In [None]:
# Question (a)

In [None]:
# Question (b)

## Census ##

In [None]:
full = Table.read_table('nc-est2014-agesex-res.csv')
full

In [None]:
partial = full.select('SEX', 'AGE', 'CENSUS2010POP', 'POPESTIMATE2014')
partial.show(4)

In [None]:
simple = partial.relabeled(2, '2010').relabeled(3, '2014')
simple.show(4)

In [None]:
simple.sort('AGE')

In [None]:
simple.sort('AGE', descending=True)

## Visualization ##

In [None]:
no_999 = simple.where('AGE', are.below(999))
everyone = no_999.where('SEX', 0).drop('SEX')

In [None]:
everyone

In [None]:
everyone.plot('AGE', '2010')

In [None]:
du_bois.num_columns