In [2]:
# changing the width of cells

from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [116]:
# import packages

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os
from pathlib import Path
import pickle
import requests

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics

In [138]:
class advanced_exercise:
    
    def __init__(self, data_path, fig_path, model_path):
        """defines variables
        :param data_path: path to data
        :param fig_path: path to store figures
        :param model_path: path to store model"""
        self.data_path = data_path
        self.fig_path = fig_path
        self.model_path = model_path
        
    def __main__(self) -> np.float64:
        """reads in data, creates train and test sets. Then defines model, uses it to 
        predict sales using advertising costs across all parameters
        :returns: model performance score"""
        
        data = pd.read_csv(self.data_path, quotechar=",").rename(columns={'"': '"', '""TV""':"tv_advertising_costs", '""Radio""':"radio_advertising_costs", '""Newspaper""': "newspaper_advertising_costs", '""Sales"""':"sales"}) #
        data.drop(data.columns[0], axis=1, inplace=True) # rename features, drop dangling column 
        
        X = data.drop("sales", axis=1)
        y = data.sales
        
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2) # create train-test sets
        regr = LinearRegression()
          
        # training model and predicting on test data
        regr.fit(X_train, y_train)
        y_pred = regr.predict(X_test)
        
        # plotting and saving figure
        fig = plt.figure()
        plt.xlabel("Test values of sales")
        plt.ylabel("Predicted values of sales")
        plt.title("Predicted vs test sales values of simple linear regr. model")
        plt.scatter(y_test, y_pred)
        plt.savefig(self.fig_path)
        
        # saving model 
        pickle.dump(regr, open(self.model_path, "wb"))

        return round(metrics.r2_score(y_test, y_pred), 2) # return performance score 