In [2]:
# this cell reads the data from file
import pandas

# loading data from file
dataset = pandas.read_csv('Harness_Size_Data.csv')

# printing first few rows
dataset.head()

Unnamed: 0,boot_size,harness_size,sex,age_years
0,39,58,male,12.0
1,38,58,male,9.6
2,37,52,female,8.6
3,39,58,male,10.2
4,38,57,male,7.8


In [7]:
# this cell print the original dataset
import plotly.graph_objects as go

fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x = dataset.harness_size,
        y = dataset.boot_size,
        marker = dict(color="blue", size=12),
        mode = "markers"
    )
)

# fig.add_trace(
#     go.Scatter(
#         x = dataset.harnes_size,
#         y = dataset.boot_size,
#         mode = "lines",
#         name = "Model",
#         marker_color = "lightgreen"
#     )
# )

fig.update_layout(title="Original Dataset",
                  xaxis_title="harness size",
                  yaxis_title="boot size")

In [3]:
# this cell creates the osl model
import statsmodels.formula.api as smf

# creating formula that says boot size is explained by harness size
formula = "boot_size ~ harness_size"

# creating model but not training it
model = smf.ols(formula = formula, data = dataset)

# train model
trained_model = model.fit()

# printing information about model
print("The following model parameters have been found:\n" +
        f"Line slope: {trained_model.params[1]}\n"+
        f"Line Intercept: {trained_model.params[0]}")

The following model parameters have been found:
Line slope: 0.5859254167382717
Line Intercept: 5.719109812682551


In [8]:
# this cell prints the dataset with model prdecting the output of dataset
import plotly.express as px

df = px.data.tips()
fig = px.scatter(
    df, 
    x= dataset.harness_size,
    y = dataset.boot_size,
    # size = dataset.harness_size // 50,
    trendline="ols",
)


fig.update_layout(
    title="Dots show original dataset and line represents model",
    xaxis_title="harness size",
    yaxis_title="boot size"
)
fig.show()

In [10]:
# this cell save the model to the disk
import joblib

model_filename = './Dog_Boot_Size_Model'

joblib.dump(trained_model, model_filename)

print('Model saved!')


Model saved!


In [12]:
# this cell loads the model from disk and uses it
def load_model_and_predict(harness_size):
    '''
    This function loads a pretrained model. It uses the model
    with the customer's dog's harness size to predict the size of
    boots that will fit that dog.

    harness_size: The dog harness size, in cm 
    '''
    # loading the model and printing basic information about it
    loaded_model = joblib.load(model_filename)

    print("Model is loaded with parameters:")
    print(loaded_model.params)

    # preparing data for input
    inputs = { "harness_size": [harness_size] }

    # using model to predict boot size
    predicted_boot_size = loaded_model.predict(inputs)[0]

    return predicted_boot_size

# using model
predicted_boot_size = load_model_and_predict(45)

print("Predicted dog boot size: ", predicted_boot_size)

Model is loaded with parameters:
Intercept       5.719110
harness_size    0.585925
dtype: float64
