<a href="https://colab.research.google.com/github/BaseKan/optimisation_workshop/blob/main/solutions.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup

In [None]:
import os
import pandas as pd
import csv
import sys
import numpy as np
from PIL import Image, ImageColor
import asyncio
from matplotlib.pyplot import imshow
csv.field_size_limit(sys.maxsize)

131072

In [None]:
!pip install memory_profiler

Collecting memory_profiler
  Downloading https://files.pythonhosted.org/packages/8f/fd/d92b3295657f8837e0177e7b48b32d6651436f0293af42b76d134c3bb489/memory_profiler-0.58.0.tar.gz
Building wheels for collected packages: memory-profiler
  Building wheel for memory-profiler (setup.py) ... [?25l[?25hdone
  Created wheel for memory-profiler: filename=memory_profiler-0.58.0-cp37-none-any.whl size=30180 sha256=9078be640f6dcbae96e0ca85c1637e9ed7317add354075a64fb1f9020d8eac7c
  Stored in directory: /root/.cache/pip/wheels/02/e4/0b/aaab481fc5dd2a4ea59e78bc7231bb6aae7635ca7ee79f8ae5
Successfully built memory-profiler
Installing collected packages: memory-profiler
Successfully installed memory-profiler-0.58.0


In [None]:
import memory_profiler
import time

def time_mem_decorator(func):                                                                                            
    def out(*args, **kwargs):                                                                                            
        m1 = memory_profiler.memory_usage()
        t1 = time.time()
        
        result = func(*args, **kwargs)
        
        t2 = time.time()
        m2 = memory_profiler.memory_usage()
        time_diff = t2 - t1
        mem_diff = m2[0] - m1[0]
        print(f"It took {time_diff} Secs and {mem_diff} Mb to execute this function.")
        return(result)
    return out  

# Vectorisatie

In [None]:
@time_mem_decorator
def get_results_fast(x,y):
  return np.vectorize(complicated_calculation)(x,y)

In [None]:
x = np.random.randn(int(1e6))
y = np.random.randn(int(1e6))

In [None]:
res_fast = get_results_fast(x, y)

It took 7.43865966796875e-05 Secs and 0.0 Mb to execute this function.


# Generators

In [None]:
!curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id=1DhyJdebnB6zwV5Jce1TgTO8PwfNtwn7P' | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt
!curl -L -b cookies.txt -o 'en-books-dataset.zip' 'https://docs.google.com/uc?export=download&id=1DhyJdebnB6zwV5Jce1TgTO8PwfNtwn7P&confirm='$(<confirm.txt)
!unzip en-books-dataset.zip
!rm -f confirm.txt cookies.txt en-books-dataset.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3284    0  3284    0     0  11728      0 --:--:-- --:--:-- --:--:-- 11728
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   408    0   408    0     0   2385      0 --:--:-- --:--:-- --:--:--  2372
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100  481M    0  481M    0     0  35.5M      0 --:--:--  0:00:13 --:--:-- 51.2M
Archive:  en-books-dataset.zip
  inflating: en-books-dataset.csv    


In [None]:
def fast_csv_reader(filename):
    with open(filename, 'r') as csv_file:
        for row in csv.reader(csv_file):
            yield row

In [None]:
print(next(iter(fast_csv_reader('en-books-dataset.csv'))))

['title', 'url', 'abstract', 'body_text', 'body_html']


In [None]:
@time_mem_decorator
def fast_row_count(filename):
    rows = 0
    for row in fast_csv_reader(filename):
        rows += 1
    # -1 because the reader includes the headers.
    return f"There are {rows - 1} rows in the csv file."

In [None]:
fast_row_count('en-books-dataset.csv')

It took 47.35005974769592 Secs and 39.2421875 Mb to execute this function.


'There are 82258 rows in the csv file.'

# Slimme queries

In [None]:
!curl -L -c cookies.txt 'https://docs.google.com/uc?export=download&id=1s08G81pKwd_K0SjMva4tJ0rdvjAdLpzL' | sed -rn 's/.*confirm=([0-9A-Za-z_]+).*/\1/p' > confirm.txt    
!curl -L -b cookies.txt -o 'csv-data.zip' 'https://docs.google.com/uc?export=download&id=1s08G81pKwd_K0SjMva4tJ0rdvjAdLpzL&confirm='$(<confirm.txt)
!unzip csv-data.zip
!rm -f confirm.txt cookies.txt csv-data.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  3284    0  3284    0     0  17751      0 --:--:-- --:--:-- --:--:-- 17751
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100   408    0   408    0     0   1658      0 --:--:-- --:--:-- --:--:--  1658
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0
100 32.5M    0 32.5M    0     0  11.4M      0 --:--:--  0:00:02 --:--:-- 17.8M
Archive:  csv-data.zip
  inflating: wine_metrics.csv        
  inflating: netflix_titles.csv      
  inflating: wine_meta.csv           
  inflating: wine_magazine.csv       


In [None]:
def load_wine_datasets():
    return (
        pd.read_csv('wine_meta.csv',index_col=0),
        pd.read_csv('wine_metrics.csv', index_col=0),
    )

In [None]:
@time_mem_decorator
def calculate_average_prices_for_best_wines_per_region(wine_datasets = load_wine_datasets()):
    wine_meta = wine_datasets[0]
    wine_metrics = wine_datasets[1]
    wine_metrics = wine_metrics[wine_metrics.points >= 90]
    wines = wine_meta.join(wine_metrics)
    return wines[['region_1','price']].groupby('region_1').agg('mean')

  if self.run_code(code, result):


In [None]:
calculate_average_prices_for_best_wines_per_region()

It took 0.06484222412109375 Secs and 0.0078125 Mb to execute this function.


Unnamed: 0_level_0,price
region_1,Unnamed: 1_level_1
Abruzzo,29.285714
Adelaida District,53.750000
Adelaide,27.888889
Adelaide Hills,25.743590
Adelaide Plains,
...,...
Yolo County,20.666667
York Mountain,51.666667
Yorkville Highlands,49.200000
Yountville,62.020000
