### Class exercise: table
![](../../images/numpy_slice_ex.png)
- on the above image find the 5 different slicings using only the array slicing notation

In [22]:
import numpy as np

a = np.arange(10, 30).reshape(4, 5)
print(a)

yellow = a[0, 0]
red = a[0, 1:4]
teal = a[:, 1::2]
green = a[:-1, 2]
blue = a[::2, -1]

print("\nyellow:", yellow, "\nred:", red, "\nteal:\n", teal, "\ngreen:", green, "\nblue:", blue)

[[10 11 12 13 14]
 [15 16 17 18 19]
 [20 21 22 23 24]
 [25 26 27 28 29]]

yellow: 10 
red: [11 12 13] 
teal:
 [[11 13]
 [16 18]
 [21 23]
 [26 28]] 
green: [12 17 22] 
blue: [14 24]


## 3D
#### reshape can be done with multiple dimensions. Here is an example with 3d  
`a = np.arange(0, 27).reshape((3, 3, 3))`  
![](../../images/cube.png)

### Class exercise: cube
1. Slice out [12 13 14] from the above cube using only one slice. e.g: a[:,:,:]
2. Slice out [3 12 21].
3. Slice out all y-values where x is 2 and z is 0.

In [36]:
a = np.arange(0,27).reshape(3,3,3)
print(a)

slice1 = a[1, 1, :]
slice2 = a[:, 1, 0]
slice3 = a[0, :, 2]
print("\n")
print(slice1)
print(slice2)
print(slice3)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


[12 13 14]
[ 3 12 21]
[2 5 8]


In [38]:
# assign value to multiple cells, for fun
b = a.copy()
print(b)
print("\n")
b[:, 1] = 99
print(b)

[[[ 0  1  2]
  [ 3  4  5]
  [ 6  7  8]]

 [[ 9 10 11]
  [12 13 14]
  [15 16 17]]

 [[18 19 20]
  [21 22 23]
  [24 25 26]]]


[[[ 0  1  2]
  [99 99 99]
  [ 6  7  8]]

 [[ 9 10 11]
  [99 99 99]
  [15 16 17]]

 [[18 19 20]
  [99 99 99]
  [24 25 26]]]


In [34]:
c = np.empty((3, 3, 3), dtype=int)
c[:] = 99
print(c)

[[[99 99 99]
  [99 99 99]
  [99 99 99]]

 [[99 99 99]
  [99 99 99]
  [99 99 99]]

 [[99 99 99]
  [99 99 99]
  [99 99 99]]]


### Class exercise: masking
- For the dataset: `data = np.arange(1,101).reshape(10,10)`
  - apply a mask that will return only the even numbers
  - using `np.where()` return only numbers that ends with 6

In [47]:
data = np.arange(1,101).reshape(10,10)

evens = data % 2 == 0
print(evens)
print(data[evens], "\n")

sixes = np.where(data % 10 == 6)
print(sixes)
print(data[sixes])

[[False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]
 [False  True False  True False  True False  True False  True]]
[  2   4   6   8  10  12  14  16  18  20  22  24  26  28  30  32  34  36
  38  40  42  44  46  48  50  52  54  56  58  60  62  64  66  68  70  72
  74  76  78  80  82  84  86  88  90  92  94  96  98 100] 

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([5, 5, 5, 5, 5, 5, 5, 5, 5, 5]))
[ 6 16 26 36 46 56 66 76 86 96]


## Exercise numpy and csv

1. load the csv file: befkbhalderstatkode.csv into a numpy ndarray
2. How many german children of 0 years were there in Copenhagen in 2015?
3. create a function that can take any combination of the 4 parameters:AAR,BYDEL,ALDER,STATKODE and return population data
4. create a new function like previous so that it can sum values for all ages if age is not provided to the function
5. further add functionality to sum values if citizenship or area was not provided to function.
6. create a new function that can also give average values for each year if year whas not provided.
7. create a function, that given year and nationality can return which area had the most of these nationals by that year. Test it by finding out which area had the most Moroccan people in both 1992 and 2015
8. Find the Area(s) where fewest foreingers lived in Copenhagen in 1992 and 2015 respectively
9. Find out what age most French people have in 2015

In [13]:
import numpy as np
#1
file_path = "../../data/befkbhalderstatkode.csv"
data = np.genfromtxt(file_path, delimiter=",", dtype=np.uint, skip_header=1)

#2
germans_age_0_2015 = data[(data[:,0] == 2015) & (data[:,2] == 0) & (data[:,3] == 5180)][:,4].sum()
print("Germans age 0:", germans_age_0_2015)

#3
def get_pop(year, neighbourhood, age, country):
    return data[(data[:,0] == year) & (data[:,1] == neighbourhood) & (data[:,2] == age) & (data[:,3] == country)][:,4].sum()
pop_msg = "In year {}, there were {} citizen of country code {}, age {}, in neighbourhood {}."
print(pop_msg.format(2015, get_pop(2015, 1, 25, 5130), 5130, 25, 1))

#4
def get_pop_new(y, n, c, a=None):
    age_mask = data[:,2] == a if a else True
    return data[(data[:,0] == y) & (data[:,1] == n) & age_mask & (data[:,3] == c)][:,4].sum()
print(pop_msg.format(2015, get_pop_new(2015, 1, 5130), 5130, "any", 1))

#5
def get_pop_new_new(y, n=None, c=None, a=None):
    neighb_mask = data[:,1] == n if n else True
    country_mask = data[:,3] == c if c else True
    age_mask = data[:,2] == a if a else True
    return data[(data[:,0] == y) & neighb_mask & age_mask & country_mask][:,4].sum()
print(pop_msg.format(2015, get_pop_new_new(2015), "any", "any", "any"))

#6
def get_pop_per_year(y, mask):
    return data[mask & (data[:,0] == y)][:,4].sum()

def get_pop_new_new_new(y=None, n=None, c=None, a=None):
    # if entire mask is True, it adds a layer of list which screws everything up
    # year_mask = data[:,0] == y if y else True
    neighb_mask = data[:,1] == n if n else True
    country_mask = data[:,3] == c if c else True
    age_mask = data[:,2] == a if a else True
    mask = neighb_mask & age_mask & country_mask
    if y:
        year_mask = data[:,0] == y
        return data[year_mask & mask][:,4].sum()
    else:
        years = set(data[:,0]) # could also use np.unique()
        return np.mean([get_pop_per_year(year, mask) for year in years])
print(pop_msg.format("average", get_pop_new_new_new(), "any", "any", "any"))
# this solution annoys me. To avoid the overall mask becoming True, I instead iterate all years.

#7
def pop_per_neighb(neighb, mask):
        return data[(data[:,1] == neighb) & mask][:,4].sum()

def biggest_neighbourhood_per_nationality(country, year):
    mask_year = data[:,0] == year
    mask_country = data[:,3] == country
    neighbourhoods = np.unique(data[:,1])
    pop_in_all = np.array([[n, pop_per_neighb(n, mask_year & mask_country)] for n in neighbourhoods])
    # return pop_in_all[np.argsort(pop_in_all[:,1])][-1,:] #sort, return highest
    return pop_in_all[pop_in_all.argmax(axis=0)[1]]

country = 5244
year = 2015
neighb, pop = biggest_neighbourhood_per_nationality(country, year)
template = "There were most people from country code {} in neighbourhood {} in {}, {} people."
print(template.format(country, neighb, year, pop))

year = 1992
neighb, pop = biggest_neighbourhood_per_nationality(country, year)
print(template.format(country, neighb, year, pop))
# I really don't like this solution.
# I don't like using a loop to aggregate by year. I don't like this way of getting the max value either.
# The argmax solution is probably better now.

Germans age 0: 35
In year 2015, there were 15 citizen of country code 5130, age 25, in neighbourhood 1.
In year 2015, there were 346 citizen of country code 5130, age any, in neighbourhood 1.
In year 2015, there were 580295 citizen of country code any, age any, in neighbourhood any.
In year average, there were 507058.1666666667 citizen of country code any, age any, in neighbourhood any.
There were most people from country code 5244 in neighbourhood 8 in 2015, 332 people.
There were most people from country code 5244 in neighbourhood 4 in 1992, 482 people.
