<a href="https://colab.research.google.com/github/BaseKan/aiday_training_resources/blob/main/Cython/main_solutions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [1]:
%load_ext Cython
import numpy as np
import pandas as pd
from math import sin,tan,cos

# Opdracht: np.vectorize naar Cython

In [None]:
def complicated_calculation(x,y):
  if x > 0.5*y and y < 0.3:
      res = sin(x-y)
  elif x < 0.5*y:
      res = tan(x+y)
  elif x > 0.2*y:
      res = sin(x)*np.sin(y)
  else:
      res = cos(x/(0.1+abs(y)))
  return res

In [None]:
def get_results_fast(x,y):
  return np.vectorize(complicated_calculation)(x,y)

In [3]:
x = np.random.randn(int(1e6))
y = np.random.randn(int(1e6))

In [None]:
%timeit res_fast = get_results_fast(x, y)

In [2]:
%%cython --annotate
cimport cython
import numpy as np
cimport numpy as np
from libc.math cimport sin, cos, tan, fabs

@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
cdef double complicated_calculation(double x, double y):
  cdef double res
  if x > 0.5*y and y < 0.3:
      res = sin(x-y)
  elif x < 0.5*y:
      res = tan(x+y)
  elif x > 0.2*y:
      res = sin(x)*sin(y)
  else:
      res = cos(x/(0.1+fabs(y)))
  return res

@cython.boundscheck(False)
@cython.wraparound(False)
def c_get_results_fast(double[:] x, double[:] y):
  cdef int n
  n = x.shape[0]
  cdef double[:] res = np.zeros(n, dtype=np.dtype('d'))
  for i in range(n):
    res[i] = complicated_calculation(x[i],y[i])
  
  return res

In [4]:
%timeit res_fast = c_get_results_fast(x, y)

10 loops, best of 5: 47.7 ms per loop


# Opdracht:

In [None]:
!curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id=151gCztjHR_D2uIoebxfi52DZWGLabOQd' | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt    
!curl -L -b cookies.txt -o 'weatherAUS.zip' 'https://docs.google.com/uc?export=download&id=151gCztjHR_D2uIoebxfi52DZWGLabOQd&confirm='$(<confirm.txt)
!unzip weatherAUS.zip
!rm -f confirm.txt cookies.txt weatherAUS.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   388    0   388    0     0    202      0 --:--:--  0:00:01 --:--:--   202
100 3781k    0 3781k    0     0  1456k      0 --:--:--  0:00:02 --:--:-- 18.5M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   408    0   408    0     0    211      0 --:--:--  0:00:01 --:--:--   211
  0     0    0     0    0     0      0      0 --:--:--  0:00:02 --:--:--     0
  0     0    0     0    0     0      0      0 --:--:--  0:00:02 --:--:--     0
100 3781k    0 3781k    0     0  1509k      0 --:--:--  0:00:02 --:--:-- 1509k
Archive:  weatherAUS.zip
  inflating: weatherAUS.csv          


In [None]:
df = pd.read_csv('weatherAUS.csv')

In [None]:
df = df.apply(lambda x: x.fillna(x.mean()) if x.dtype == 'float64' else x,
              axis=0)
df.Date = pd.to_datetime(df.Date)

df = df.sort_values('Date').reset_index(drop=True)
df.head()

Unnamed: 0,Date,Location,MinTemp,MaxTemp,Rainfall,Evaporation,Sunshine,WindGustDir,WindGustSpeed,WindDir9am,WindDir3pm,WindSpeed9am,WindSpeed3pm,Humidity9am,Humidity3pm,Pressure9am,Pressure3pm,Cloud9am,Cloud3pm,Temp9am,Temp3pm,RainToday,RainTomorrow
0,2007-11-01,Canberra,8.0,24.3,0.0,3.4,6.3,NW,30.0,SW,NW,6.0,20.0,68.0,29.0,1019.7,1015.0,7.0,7.0,14.4,23.6,No,Yes
1,2007-11-02,Canberra,14.0,26.9,3.6,4.4,9.7,ENE,39.0,E,W,4.0,17.0,80.0,36.0,1012.4,1008.4,5.0,3.0,17.5,25.7,Yes,Yes
2,2007-11-03,Canberra,13.7,23.4,3.6,5.8,3.3,NW,85.0,N,NNE,6.0,6.0,82.0,69.0,1009.5,1007.2,8.0,7.0,15.4,20.2,Yes,Yes
3,2007-11-04,Canberra,13.3,15.5,39.8,7.2,9.1,NW,54.0,WNW,W,30.0,24.0,62.0,56.0,1005.5,1007.0,2.0,7.0,13.5,14.1,Yes,Yes
4,2007-11-05,Canberra,7.6,16.1,2.8,5.6,10.6,SSE,50.0,SSE,ESE,20.0,28.0,68.0,49.0,1018.3,1018.5,7.0,7.0,11.1,15.4,Yes,No


In [None]:
df['AvgTemp'] = df[['MinTemp', 'MaxTemp']].mean(axis=1)

In [None]:
%%cython --annotate
import numpy as np
def c_classify_temperature(double[:] avg_temp):
  cdef double cold = np.quantile(avg_temp, 0.25)
  cdef double hot = np.quantile(avg_temp, 0.75)
  cdef int n = avg_temp.shape[0]
  cdef str[:] res = np.zeros(n, dtype=object)
  for i in range(n):
    if (avg_temp[i] < cold):
      res[i] = "cold"
    elif (avg_temp[i] < hot):
      res[i] = "average"
    else:
      res[i] = "hot"

  return res
  

In [None]:
%%timeit
np.array(c_classify_temperature(df.AvgTemp.to_numpy(dtype=np.dtype('d'))))

100 loops, best of 5: 6.34 ms per loop
