# Mapping Crime in Vancouver

Let's just have some plotting fun!

This is loosely an HtDAP design, but we've skipped the planning stages to keep it short! We're also working from the Module 7 VPD location project for fun :)

So, edit and refactor this into something to map crime! (We've already updated the data definitions and `read` function, but not `main` or `analyze`!)

In [None]:
from cs103 import *
from typing import NamedTuple, List
from enum import Enum
import csv

##################
# Data Definitions

CrimeData = NamedTuple('CrimeData', [('x', float),
                                     ('y', float)])
# interp. data about a single crime in Vancouver with its x and y location.
# (Locations are in metres offset from a somewhat arbitrary point on the surface of
# the earth. (Caution: locations of (0, 0) are sometimes placeholders
# or intentionally inaccurate reports. Fortunately, that doesn't occur in the 
# subset of the data we're looking at.)
CD1 = CrimeData(0, 0)
CD2 = CrimeData(-3.5, 2.0)
CD3 = CrimeData(490258.683, 5458154.503)  # sample location actually pulled from our data

# template based on compound (2 fields)
@typecheck
def fn_for_crime_data(cd: CrimeData) -> ...:
    return ...(cd.x,
               cd.y)
    

# List[CrimeData]
# interp. a list of crime data
LOCD0 = []
LOCD1 = [CD1, CD2]

# template based on arbitrary-sized data and reference rule
@typecheck
def fn_for_locd(locd: List[CrimeData]) -> ...:
    # description of accumulator
    acc = ... # type: ...
    
    for cd in locd:
        acc = ...(fn_for_crime_data(cd), acc)
        
    return ...(acc)


# List[float]
# interp. a list of floats
LOF0 = []
LOF1 = [0, -3.5]

# template based on arbitrary-sized data
@typecheck
def fn_for_lof(lof: List[float]) -> ...:
    # description of accumulator
    acc = ... # type: ...
    
    for f in lof:
        acc = ...(f, acc)
        
    return ...(acc)

In [None]:
@typecheck
def read(filename: str) -> List[CrimeData]:
    """    
    reads information from the specified file and returns a list of crime data
    
    the file must be in the VPD crime format, and the x and y entries must be valid 
    floats.
    """
    # Note: in future, we might want to skip (0, 0) entries, but we won't now.
    
    #return []  #stub
    # Template from HtDAP

    # locd contains the result so far
    locd = [] # type: List[CrimeData]

    with open(filename) as csvfile:
        
        reader = csv.reader(csvfile)
        next(reader) # skip header line

        for row in reader:
            cd = CrimeData(parse_float(row[8]), parse_float(row[9]))
            locd.append(cd)
    
    return locd



start_testing()
expect(read("testfile_empty.csv"), []) 
expect(read("testfile_small.csv"), [CrimeData(0, 0),
                                    CrimeData(-3.5, 2.0)]) 

summary()


## Scatterplot solution from the worksheet

Our "template" in the viz module is just to copy-and-paste from a sample of the kind of plot we want. That's not so unrealistic as a starting point as long as we understand what we're using!

Here's the scatterplot worked example body as a starting point for our template:

```python
@typecheck
def show_scatterplot(ages: List[int], salaries: List[int], counts: List[int]) -> None:
    """
    display a scatterplot of salaries vs. ages. salaries are given in 1000s
    
    Assumes that the lengths of ages, salaries, and counts are all equal
    """
    #return None #stub
    # Template based on visualization
    
    areas = convert_counts_to_areas(counts)

    # set the labels for the axes
    plt.xlabel('Age')
    plt.ylabel('Salary (in 1000s)')
    plt.title('Salaries by age')

    # range for the axes
    # [x-min, x-max, y-min, y-max]
    plt.axis([0,65,0,105])

    # create the scatterplot, with markers that are red (c='r') and triangular (marker='^')
    plt.scatter(ages,salaries,marker='^', c='r', s=areas)

    # show the plot
    plt.show()
    
    return None
```

In [None]:
@typecheck
def main(filename: str) -> CrimeData:
    """
    Reads the crime data from given filename and finds and returns the average
    location of all crimes in the file as a new (fictitious) CrimeData.
    
    Returns CrimeData(0, 0) if there are no data.
    """
    #return CrimeData(0, 0)  #stub
    # We might want to rename main, but I left it as is just to emphasize that
    # that is OK here as well. Our file should have a good name, however!
    
    # Template from HtDAP, based on function composition 
    return analyze(read(filename))     


@typecheck
def analyze(locd: List[CrimeData]) -> CrimeData: 
    """ 
    Finds and returns the average location of all crimes in the file as a 
    new (fictitious) CrimeData.
    
    Returns CrimeData(0, 0) if there are no data.
    """ 
    #return CrimeData(0, 0)  #stub
    # template based on composition

    # Plan:
    # 1) get only the x values
    # 2) find the average of the x values
    # 3) get only the y values
    # 4) find the average of the y values
    # 5) return a new CrimeData constructed from the averages
    
    x_values = get_x_locations(locd)
    x_avg = average(x_values)
    
    y_values = get_y_locations(locd)
    y_avg = average(y_values)
    
    return CrimeData(x_avg, y_avg)

@typecheck
def get_x_locations(locd: List[CrimeData]) -> List[float]:
    """
    return the x locations from locd
    """
    #return []  #stub
    # template from List[CrimeData]
    
    # acc is the result so far
    acc = [] # type: List[float]
    
    for cd in locd:
        acc.append(cd.x)
        
    return acc

@typecheck
def get_y_locations(locd: List[CrimeData]) -> List[float]:
    """
    return the y locations from locd
    """
    #return []  #stub
    # template from List[CrimeData]
    
    # acc is the result so far
    acc = [] # type: List[float]
    
    for cd in locd:
        acc.append(cd.y)
        
    return acc

@typecheck
def average(lof: List[float]) -> float:
    """
    return the average value in lof (or 0.0 if lof is empty)
    """
    #return 0.0  #stub
    # template based on composition
    
    # Plan:
    # 1) If lof is empty, just return 0.0
    # 2) Get the sum of lof
    # 3) Get the number of elements in lof
    # 4) Return the sum divided by the number of elements
    #    (safe since we know that the number of elements is not 0!)
    if lof == []:
        return 0.0
    total = sum(lof)
    count = len(lof)
    return total / count

start_testing()
expect(main("testfile_empty.csv"), CrimeData(0, 0)) 
expect(main("testfile_small.csv"), CrimeData((0 + -3.5) / 2, (0 + 2.0) / 2))
summary()

start_testing()
expect(analyze([]), CrimeData(0, 0)) 
expect(analyze([CrimeData(0, 0), CrimeData(-3.5, 2.0)]), CrimeData((0 + -3.5) / 2, (0 + 2.0) / 2))
summary()

start_testing()
expect(get_x_locations([]), []) 
expect(get_x_locations([CrimeData(0, 0), CrimeData(-3.5, 2.0)]), [0, -3.5])
summary()

start_testing()
expect(get_y_locations([]), []) 
expect(get_y_locations([CrimeData(0, 0), CrimeData(-3.5, 2.0)]), [0, 2.0])
summary()

start_testing()
expect(average([]), 0.0) 
expect(average([0, 2.0]), 1.0)
summary()




In [None]:
# Here is our overall average location!
main("crimedata_subset_bne_theft_of_bike_veh_2018.csv")