In [None]:
import numpy as np
import math
import matplotlib.pyplot as plt
%matplotlib inline

### Numpy basics

1\. Find the row, column and overall means for the following matrix:

```python
m = np.arange(12).reshape((3,4))
```

In [None]:
m = np.arange(12).reshape((3,4))
print("The matrix is: \n",m,"\n")

row_mean = np.mean(m,1);
column_mean = np.mean(m,0);
overall_mean = np.mean(m);
print("Row means is:",(row_mean))
print("Column means is:",(column_mean))
print("Matrix mean is:",(overall_mean))

2\. Find the outer product of the following two vecotrs

```python
u = np.array([1,3,5,7])
v = np.array([2,4,6,8])
```

Do this in the following ways:

   * Using the function outer in numpy
   * Using a nested for loop or list comprehension
   * Using numpy broadcasting operatoins


In [None]:
u = np.array([1,3,5,7])
v = np.array([2,4,6,8])

outernp = np.outer(u,v)
print("Outer product using outer: \n",outernp,"\n")

outerlc = np.array([[i*j for i in v] for j in u])
print("Outer product using lc: \n",outerlc,"\n")

outerbroad = u[:,None]*(v[:, None].T)
print("Outer product using numpy broadcasting: \n",outerbroad,"\n")

3\. Create a 10 by 6 matrix of random uniform numbers. Set all rows with any entry less than 0.1 to be zero

Hint: Use the following numpy functions - np.random.random, np.any as well as Boolean indexing and the axis argument.

In [None]:
random_mat = np.array([np.random.random() for i in range(60)]).reshape((10,6))
random_mat[random_mat < 0.1] = 0
print(random_mat)

4\. Use np.linspace to create an array of 100 numbers between 0 and 2π (includsive).

  * Extract every 10th element using slice notation
  * Reverse the array using slice notation
  * Extract elements where the absolute difference between the sine and cosine functions evaluated at that element is less than 0.1
  * Make a plot showing the sin and cos functions and indicate where they are close

In [None]:
vec = np.linspace(0, 2*math.pi, num=100)
print("Original vector:\n",vec,"\n")

vec1 = vec[0:100:10]
print("Extracted vector:\n",vec1,"\n")

vec2 = vec[::-1]
print("Reversed vector:\n",vec2,"\n")

vec3 = [x for x in vec if math.fabs(math.sin(x) - math.cos(x)) < 0.1]
print("Values with abs less than 0.1 vector:\n",vec3,"\n")

import matplotlib.pyplot as plt

vecsin = [math.sin(x) for x in vec]
veccos = [math.cos(x) for x in vec]

plt.plot(vec, vecsin, label='sine')
plt.plot(vec, veccos, label='cosine')
plt.xlabel('angle [rad]')
plt.legend(loc='lower left')
plt.show()

5\. Create a matrix that shows the 10 by 10 multiplication table.

 * Find the trace of the matrix
 * Extract the anto-diagonal (this should be ```array([10, 18, 24, 28, 30, 30, 28, 24, 18, 10])```)
 * Extract the diagnoal offset by 1 upwards (this should be ```array([ 2,  6, 12, 20, 30, 42, 56, 72, 90])```)

In [None]:
u = np.array([x for x in range(1,11)])
table = u[:,None]*(u[:, None].T)
print("The multiplication matrix is: \n",table,"\n")

trace = np.trace(table)
print("The trace is:",trace,"\n")

antidiagonal = np.diagonal(np.fliplr(table))
print("The antidiagonal is:\n",antidiagonal,"\n")

diagonal_off1 = np.diagonal(table, 1)
print("The diagonal offset by 1 upwards is:\n",diagonal_off1,"\n")

6\. Use broadcasting to create a grid of distances

Route 66 crosses the following cities in the US: Chicago, Springfield, Saint-Louis, Tulsa, Oklahoma City, Amarillo, Santa Fe, Albuquerque, Flagstaff, Los Angeles
The corresponding positions in miles are: 0, 198, 303, 736, 871, 1175, 1475, 1544, 1913, 2448

  * Construct a 2D grid of distances among each city along Route 66
  * Convert that in km (those savages...)

In [None]:
dist = np.array([0, 198, 303, 736, 871, 1175, 1475, 1554, 1913, 2448])
grid_miles = np.absolute(dist[:,None]-(dist[:, None].T))
print("The grid in miles is:\n",grid_miles,"\n")

grid_km = grid_miles*1.60934
np.set_printoptions(precision=1)
print("The grid in km is:\n",grid_km)


7\. Prime numbers sieve: compute the prime numbers in the 0-N (N=99 to start with) range with a sieve (mask).
  * Constract a shape (100,) boolean array, the mask
  * Identify the multiples of each number starting from 2 and set accordingly the corresponding mask element
  * Apply the mask to obtain an array of ordered prime numbers
  * Check the performances (timeit); how does it scale with N?
  * Implement the optimization suggested in the [sieve of Eratosthenes](https://en.wikipedia.org/wiki/Sieve_of_Eratosthenes)

In [None]:
N = 100

def primes_eratosthenes(N):
    mask = np.zeros(N, dtype=bool)
    numbers = np.array(range(N))
    for i in range(2, int(math.sqrt(N))):
        if mask[i] == False:
            j = i**2;
            while (j < N):
                mask[j] = True;
                j = j + i;
    y = np.ma.array(numbers, mask = mask)
    return y[~y.mask]


print("The first", N, "prime numbers are:\n",primes_eratosthenes(N))

for n in [10**i for i in np.arange(5)]:
    %timeit primes_eratosthenes(n)

In [None]:
import random

steps = 200
walkers = 1000

walk = np.array([random.choice([-1, 1]) for i in range(steps*walkers)]).reshape(walkers, steps)
dist = np.cumsum(walk, axis=1)

squared_dist = np.square(dist)
mean_dist = np.mean(squared_dist, axis=0)

plt.plot(range(steps), mean_dist)
plt.xlabel('Steps')
plt.ylabel('Mean distance')
plt.show()

9\. Analyze a data file 
  * Download the population of hares, lynxes and carrots at the beginning of the last century.
    ```python
    ! wget https://www.dropbox.com/s/3vigxoqayo389uc/populations.txt
    ```

  * Check the content by looking within the file
  * Load the data (use an appropriate numpy method) into a 2D array
  * Create arrays out of the columns, the arrays being (in order): *year*, *hares*, *lynxes*, *carrots* 
  * Plot the 3 populations over the years
  * Compute the main statistical properties of the dataset (mean, std, correlations, etc.)
  * Which species has the highest population each year?

Do you feel there is some evident correlation here? [Studies](https://www.enr.gov.nt.ca/en/services/lynx/lynx-snowshoe-hare-cycle) tend to believe so.

In [None]:
!rm populations.txt

!wget https://www.dropbox.com/s/3vigxoqayo389uc/populations.txt

In [None]:
data = np.loadtxt('populations.txt')
years, hares, lynxes, carrots = data.T # trick: columns to variables

plt.axes([0.2, 0.1, 0.5, 0.8]) 
plt.plot(year, hares, year, lynxes, year, carrots) 
plt.legend(('Hare', 'Lynx', 'Carrot'), loc=(1.05, 0.5))

dic = {'Hares':hares, 'Lynxes':lynxes, 'Carrots':carrots}
for key in dic:
    print(key)
    for function in ['sum','min','max','mean','median','std']:
        print (function, getattr(np, function)(dic[key]))
    print('\n')
mask_hares = np.logical_and(hares > lynxes, hares > carrots)
mask_lynxes = np.logical_and(lynxes > hares, lynxes > carrots)
mask_carrots = np.logical_and(carrots > lynxes, carrots > hares)

plt.figure()
plt.plot( years[mask_hares],hares[mask_hares], 'ro',  years[mask_lynxes],lynxes[mask_lynxes],
         'bo', years[mask_carrots],carrots[mask_carrots],'go')
plt.show()