In [1]:
# Import required libraries
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

## Load and Visualize the Sales Data

In [2]:
# Read the sales data
file_path = "https://static.bc-edx.com/ai/ail-v-1-0/m12/lesson_1/datasets/sales.csv"
df_sales = pd.read_csv(file_path)

# Display sample data
df_sales.head()

Unnamed: 0,ads,sales
0,21,8350
1,180,22820
2,50,12950
3,195,21105
4,96,15008


## Prepare the Data to Fit the Linear Regression Model

In [3]:
# Create the X set by using the `reshape` function to format the ads data as a single column array.
X = df_sales["ads"].values.reshape(-1, 1)

# Display sample data
X[:5]

array([[ 21],
       [180],
       [ 50],
       [195],
       [ 96]])

In [4]:
# Create an array for the dependent variable y with the sales data
y = df_sales["sales"]

## Build the Linear Regression Model

In [5]:
# Create a model with scikit-learn
model = LinearRegression()

In [6]:
# Fit the data into the model
model.fit(X, y)

## Make Predictions

In [7]:
# Make predictions using the X set
predicted_y_values = model.predict(X)

In [8]:
# Create a copy of the original data
df_sales_predicted = df_sales.copy()

# Add a column with the predicted sales values
df_sales_predicted["sales_predicted"] = predicted_y_values

# Display sample data
df_sales_predicted.head()

Unnamed: 0,ads,sales,sales_predicted
0,21,8350,9473.125608
1,180,22820,22407.614054
2,50,12950,11832.246142
3,195,21105,23627.848813
4,96,15008,15574.299403


## Linear Regression Model Assessment

In [9]:
# Import relevant metrics - score, r2, mse, rmse - from Scikit-learn
from sklearn.metrics import mean_squared_error, r2_score

In [10]:
# Compute the metrics for the linear regression model
score = round(model.score(X, y, sample_weight=None),5)
r2 = round(r2_score(y, predicted_y_values),5)
mse = round(mean_squared_error(y, predicted_y_values),4)
rmse = round(np.sqrt(mse),4)

# Print relevant metrics.
print(f"The score is {score}.")
print(f"The r2 is {r2}.")
print(f"The mean squared error is {mse}.")
print(f"The root mean squared error is {rmse}.")

The score is 0.922.
The r2 is 0.922.
The mean squared error is 1922652.7854.
The root mean squared error is 1386.5976.
