In [1]:
%%writefile generate_data.py

import numpy as np
import pandas as pd

STATE = 1337
COUNT = 500
EMISSIONS_COUNT = 120
MIN = 0
MAX = 1000

random_state = np.random.RandomState(STATE)
np.random.seed(STATE)

a = 20 * np.random.randn(COUNT) + 100
b = 30 * np.random.randn(COUNT) + 200
y = 2 * a + 8 * b + 10 * np.random.randn(COUNT) + 30
data = pd.DataFrame(list(zip(a, b, y)), columns =['a', 'b', 'y'])
data = data.round(0)
data.to_csv('data-1.csv', index=False)

a = 20 * np.random.randn(COUNT) + 100
b = 30 * np.random.randn(COUNT) + 200
y = 2 * a + 8 * b + 10 * np.random.randn(COUNT) + 30
dataEmitted = pd.DataFrame(list(zip(a, b, y)), columns =['a', 'b', 'y'])
dataEmitted = dataEmitted.round(0)
dataEmitted['b'][int(COUNT / 2 - EMISSIONS_COUNT):int(COUNT / 2 + EMISSIONS_COUNT)] *= 2
dataEmitted.to_csv('data-2.csv', index=False)

a = 15 * np.random.randn(COUNT) + 105
b = 33 * np.random.randn(COUNT) + 295
y = 2.4 * a + 7.6 * b + 10 * np.random.randn(COUNT) + 20
data = pd.DataFrame(list(zip(a, b, y)), columns =['a', 'b', 'y'])
data = data.round(0)
data.to_csv('data-3.csv', index=False)

Writing generate_data.py


In [2]:
%%writefile create_model.py

import pickle
import pandas as pd
from sklearn.linear_model import Ridge

data = pd.read_csv('data-1.csv')
clf = Ridge()
clf.fit(data[['a', 'b']], data['y'])

pickle.dump(clf, open('model.pckl', 'wb'))

Writing create_model.py


In [3]:
%%writefile test_model.py

import pickle
import pandas as pd
from sklearn.metrics import r2_score

BORDER = .85

def get_model_score(data_path: str, model_path='model.pckl'):
  model = pickle.load(open(model_path, 'rb'))
  data = pd.read_csv(data_path)
  predicted = model.predict(data[['a', 'b']])
  return r2_score(data['y'], predicted)

def test_model_1():
    assert get_model_score('data-1.csv') > BORDER

def test_model_2():
    assert get_model_score('data-2.csv') > BORDER

def test_model_3():
    assert get_model_score('data-3.csv') > BORDER

Writing test_model.py


In [6]:
!"ls"

create_model.py  data-2.csv  generate_data.py  __pycache__  test_model.py
data-1.csv	 data-3.csv  model.pckl        sample_data


In [None]:
%run -i "generate_data.py"
%run -i "create_model.py"
!"pytest"