In [1]:
from datascience import *
import numpy as np

%matplotlib inline
import matplotlib.pyplot as plots
plots.style.use('fivethirtyeight')

# Data 6: Comparisons and Control

# Boolean operators and Compound Expressions

In [1]:
x = 3
y = 2

In [2]:
x == y

False

## More examples

In [9]:
year = 'junior'
units = 125

In [10]:
year_check = year == 'senior'
year_check

False

In [11]:
units_check = units >= 120	
units_check

True

In [12]:
ready_to_grad = year_check and units_check
ready_to_grad

False

In [13]:
almost_ready = year_check or units_check
almost_ready

True

## Task: Fill in the truth table

| `x` | `y` | `(not x) and y` |
| :---: | :---: | :---: |
| `False` | `False` | ... |
| `False` | `True` | ... |
| `True` | `False` | ... |
| `True` | `True` | ... |

In [4]:
# edit this cell to fill in the table above
x = False
y = False
(not x) and y

False

# Boolean Practice

## Task: Apply-Filter-Drop

Recall the SAT dataset, which shows aggregated (average) SAT scores by state ([source 1](https://commonwealthfoundation.org/2014/12/22/sat-scores-by-state-2014/), [source 2](https://reports.collegeboard.org/sat-suite-program-results/data-archive)).

Again, this data is from 2014, so the total score is out of 2400 (over three sections each out of 800) instead of 1600.

In [15]:
sat = Table.read_table('data/sat2014-lecture.csv')
sat

State,Participation Rate,Critical Reading,Math,Writing
Alabama,6.7,547,538,532
Alaska,54.2,507,503,475
Arizona,36.4,522,525,500
Arkansas,4.2,573,571,554
California,60.3,498,510,496
Colorado,14.3,582,586,567
Connecticut,88.4,507,510,508
Delaware,100.0,456,459,444
District of Columbia,100.0,440,438,431
Florida,72.2,491,485,472


In [16]:
def at_least_one(math, writing):
    return math > 500 or writing > 500

In [22]:
sat.apply(at_least_one, "Math", "Writing")

array([ True,  True,  True,  True,  True,  True,  True, False, False,
       False, False,  True, False,  True, False,  True,  True,  True,
        True, False, False,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True, False, False,  True,  True, False,  True,
        True,  True,  True,  True,  True,  True], dtype=bool)

In [18]:
sat_with_bool_col = sat.with_columns("Strong Math or Writing",
                        sat.apply(at_least_one, "Math", "Writing"))
sat_with_bool_col

State,Participation Rate,Critical Reading,Math,Writing,Strong Math or Writing
Alabama,6.7,547,538,532,True
Alaska,54.2,507,503,475,True
Arizona,36.4,522,525,500,True
Arkansas,4.2,573,571,554,True
California,60.3,498,510,496,True
Colorado,14.3,582,586,567,True
Connecticut,88.4,507,510,508,True
Delaware,100.0,456,459,444,False
District of Columbia,100.0,440,438,431,False
Florida,72.2,491,485,472,False


In [19]:
strong_sat = sat_with_bool_col.where("Strong Math or Writing", True)
strong_sat

State,Participation Rate,Critical Reading,Math,Writing,Strong Math or Writing
Alabama,6.7,547,538,532,True
Alaska,54.2,507,503,475,True
Arizona,36.4,522,525,500,True
Arkansas,4.2,573,571,554,True
California,60.3,498,510,496,True
Colorado,14.3,582,586,567,True
Connecticut,88.4,507,510,508,True
Hawaii,62.6,484,504,472,True
Illinois,4.6,599,616,587,True
Iowa,3.1,605,611,578,True


In [20]:
strong_sat_orig_cols = strong_sat.drop("Strong Math or Writing")
strong_sat_orig_cols

State,Participation Rate,Critical Reading,Math,Writing
Alabama,6.7,547,538,532
Alaska,54.2,507,503,475
Arizona,36.4,522,525,500
Arkansas,4.2,573,571,554
California,60.3,498,510,496
Colorado,14.3,582,586,567
Connecticut,88.4,507,510,508
Hawaii,62.6,484,504,472
Illinois,4.6,599,616,587
Iowa,3.1,605,611,578


### Apply-Drop-Filter, with Method Chaining

In [21]:
def at_least_one(math, writing):
    return math > 500 or writing > 500

(sat
 .with_columns("Strong Math or Writing",
               sat.apply(at_least_one, "Math", "Writing"))
 .where("Strong Math or Writing", True)
 .drop("Strong Math or Writing")
)

State,Participation Rate,Critical Reading,Math,Writing
Alabama,6.7,547,538,532
Alaska,54.2,507,503,475
Arizona,36.4,522,525,500
Arkansas,4.2,573,571,554
California,60.3,498,510,496
Colorado,14.3,582,586,567
Connecticut,88.4,507,510,508
Hawaii,62.6,484,504,472
Illinois,4.6,599,616,587
Iowa,3.1,605,611,578


## Boolean quirk: Short-circuiting

Recall that Python evaluates expressions left to right, after prioritizing parentheses.

When evaluating a boolean expression, Python will “short circuit” (or stop early) when the final result of the boolean expression is known.

* When does each expression below "short-circuit"? How might the `and` or the `or` Boolean operators play a role?

In [None]:
1 / 0

In [None]:
(1 < 2) or (1 / 0)

In [None]:
(2 < 2) and (1 / 0)

## Boolean quirk: `True` counts as `1`, `False` counts as `0`

WWPD (What Will Python Do?)

In [27]:
arr = make_array('Data', 'Science', 'Data')
arr

array(['Data', 'Science', 'Data'],
      dtype='<U7')

In [28]:
arr == 'Data'

array([ True, False,  True], dtype=bool)

In [29]:
sum(arr == 'Data')

2

In [30]:
sum(np.arange(3) >= 1)

2

In [31]:
sum(np.arange(4) <= 1) == 2

True

## Bonus Task

What are the values of `wear_socks` and `wear_jacket` after running the following lines of code?

In [9]:
temp = 67
raining = bool(0)
wear_socks = (not not raining) and (temp < 60)
wear_jacket = (not wear_socks) or (temp > 65)
wear_jacket = wear_jacket and wear_socks

# Control: Conditional statements

In [3]:
def sign(x):
    """determines the sign (+/-) of a numeric input"""
    if x > 0:
        return 'Positive'
    
    elif x < 0:
        return 'Negative'
    
    elif x == 0:
        return 'Neither positive nor negative'

In [4]:
sign(1)

'Positive'

In [5]:
sign(-1)

'Negative'

In [6]:
sign(0)

'Neither positive nor negative'

In [7]:
sign('-1')

TypeError: '>' not supported between instances of 'str' and 'int'

### Discussion Question: What is the sign of the following expression?

In [34]:
sign(sum(np.arange(4) <= 1) * -2)

'Negative'

### Discussion: What is the difference between these two conditional statements?

Be careful with indentation!

In [36]:
if x > 5:
    print('somewhat big!')
    if x % 2 == 0:
        print('and even too!')
else:
    print('tiny.')

tiny.


In [37]:
if x > 5:
    print('somewhat big!')
if x % 2 == 0:
    print('and even too!')
else:
    print('tiny.')

and even too!


### Discussion: What is the difference between these two conditional statements?

In [38]:
def my_favorite_pizza_topping(slice):
    if slice == 'mushroom':
        return True
    else:
        return False

my_favorite_pizza_topping('pepperoni')

False

In [None]:
def my_favorite_pizza_topping(slice):
    return slice == 'mushroom'
    
my_favorite_pizza_topping('pepperoni')

Be concise! The second option is better. More "Pythonic"!

# [if time] Functions with Strings

In [2]:
s = 'JuNiOR12'

# space to try out string methods

In [3]:
'berkeley' in 'uc berkeley'

True

### **Task**: Make exciting greetings by adding the **Greeting** column as below.

| Holiday | Name | Greeting |
| --- | --- | --- |
| Hanukkah | Josh |  HAPPY HANUKKAH JOSH |
| New Year | Tracy | HAPPY NEW YEAR TRACY |
| Birthday | Jaspreet | HAPPY BIRTHDAY JASPREET |

    
See the solution below!

In [4]:
holidays = Table().with_columns(
    'Holiday', make_array('Hanukkah', 'New Year', 'Birthday'),
    'Name', make_array('Josh', 'Tracy', 'Jaspreet')
)
holidays

Holiday,Name
Hanukkah,Josh
New Year,Tracy
Birthday,Jaspreet


In [5]:
# fill in this code

def make_greeting(holiday, name):
    return ...
    
holidays.with_columns(
    "Greeting",
    holidays.apply(make_greeting, "Holiday", "Name")
)

Holiday,Name,Greeting
Hanukkah,Josh,Ellipsis
New Year,Tracy,Ellipsis
Birthday,Jaspreet,Ellipsis


<details>
  <summary>Click for Solution</summary>

Various options. One reasonable approach:

```
def make_greeting(holiday, name):
    return "HAPPY " + holiday.upper() + " " + name.upper()
```

<br/>
One advanced approach, using `join`:
    
```  
def make_greeting(holiday, name):
    return ' '.join(["happy", holiday, name]).upper()
```
    

</details>

### **Challenge**: Convert phone numbers.

For example, `510-642-3141` should be formatted as `(510) 642-3141`.

_Hint_: Try using `split`. The `split` function returns a list of strings split by the delimiter; after conversion, lists can be indexed just like an array.

See the solution below!

In [6]:
res_halls = Table().with_columns(
    'Residence Hall', 
        make_array('Unit 1', 'Unit 2', 'Unit 3', 'Foothill',
                   'Clark Kerr', 'Blackwell', 'Martinez Commons'),
    'Phone',
        make_array('510-642-3141', '510-642-3143', '510-642-5391', '510-642-9703',
                   '510-642-6290', '510-423-3740', '510-642-8517')
)
res_halls

Residence Hall,Phone
Unit 1,510-642-3141
Unit 2,510-642-3143
Unit 3,510-642-5391
Foothill,510-642-9703
Clark Kerr,510-642-6290
Blackwell,510-423-3740
Martinez Commons,510-642-8517


In [7]:
def format_phone_number(phone):
    parts = ...
    return ...

# to check a single phone number
format_phone_number("510-642-3141") 

Ellipsis

In [8]:
# then, check all numbers
res_halls.with_columns(
    "Formatted",
    res_halls.apply(format_phone_number, "Phone")
)

Residence Hall,Phone,Formatted
Unit 1,510-642-3141,Ellipsis
Unit 2,510-642-3143,Ellipsis
Unit 3,510-642-5391,Ellipsis
Foothill,510-642-9703,Ellipsis
Clark Kerr,510-642-6290,Ellipsis
Blackwell,510-423-3740,Ellipsis
Martinez Commons,510-642-8517,Ellipsis


<details>
  <summary>Click for Solution</summary>

Various options. One reasonable approach:

```
def format_phone_number(phone):
    parts = np.array(phone.split('-'))
    return "(" + parts.item(0) + ") " + parts.item(1) + "-" + parts.item(2)
```
    

</details>