In [1]:
# Dependencies
import numpy as np
import pandas as pd

In [2]:
# Read in csv
data = pd.read_csv('resources/data.csv')
data.head(100)

Unnamed: 0,date,year,month,online_food_turnover,online_nonfood_turnover,online_total_turnover,total_revenue_turnover
0,Jul-13,2013,Jul,139.5,331.4,470.9,22042.6
1,Aug-13,2013,Aug,146.7,329.6,476.3,22204.7
2,Sep-13,2013,Sep,139.7,322.7,462.4,22356.5
3,Oct-13,2013,Oct,127.9,351.6,479.5,22431.6
4,Nov-13,2013,Nov,138.5,388.1,526.6,22630.9
...,...,...,...,...,...,...,...
87,Oct-20,2020,Oct,866.5,2215.9,3082.4,29601.6
88,Nov-20,2020,Nov,813.1,2223.3,3036.4,31571.4
89,Dec-20,2020,Dec,864.1,2107.2,2971.3,30450.9
90,Jan-21,2021,Jan,903.9,2117.1,3021.0,30538.5


In [3]:
# Create new column for index so that regression can be done
data['index'] = range(1, len(data) + 1)
data.head(100)

Unnamed: 0,date,year,month,online_food_turnover,online_nonfood_turnover,online_total_turnover,total_revenue_turnover,index
0,Jul-13,2013,Jul,139.5,331.4,470.9,22042.6,1
1,Aug-13,2013,Aug,146.7,329.6,476.3,22204.7,2
2,Sep-13,2013,Sep,139.7,322.7,462.4,22356.5,3
3,Oct-13,2013,Oct,127.9,351.6,479.5,22431.6,4
4,Nov-13,2013,Nov,138.5,388.1,526.6,22630.9,5
...,...,...,...,...,...,...,...,...
87,Oct-20,2020,Oct,866.5,2215.9,3082.4,29601.6,88
88,Nov-20,2020,Nov,813.1,2223.3,3036.4,31571.4,89
89,Dec-20,2020,Dec,864.1,2107.2,2971.3,30450.9,90
90,Jan-21,2021,Jan,903.9,2117.1,3021.0,30538.5,91


In [4]:
# Assign the data to X and y
# Note: Sklearn requires a two-dimensional array of values
# so we use reshape to create this

X = data.index.values.reshape(-1, 1)
y = data.online_total_turnover.values.reshape(-1, 1)

print("Shape: ", X.shape, y.shape)
X

Shape:  (92, 1) (92, 1)


array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19],
       [20],
       [21],
       [22],
       [23],
       [24],
       [25],
       [26],
       [27],
       [28],
       [29],
       [30],
       [31],
       [32],
       [33],
       [34],
       [35],
       [36],
       [37],
       [38],
       [39],
       [40],
       [41],
       [42],
       [43],
       [44],
       [45],
       [46],
       [47],
       [48],
       [49],
       [50],
       [51],
       [52],
       [53],
       [54],
       [55],
       [56],
       [57],
       [58],
       [59],
       [60],
       [61],
       [62],
       [63],
       [64],
       [65],
       [66],
       [67],
       [68],
       [69],
       [70],
       [71],
       [72],
       [73],
       [74],
       [75],
       [76],

In [5]:
# Create the model and fit the model to the data
from sklearn.linear_model import LinearRegression

model = LinearRegression()

In [6]:
# Fit the model to the data. 
# Note: This is the training step where you fit the line to the data.

model.fit(X, y)

LinearRegression()

In [7]:
# Print the coefficient and the intercept for the model
print('Weight coefficients: ', model.coef_)
print('y-axis intercept: ', model.intercept_)

Weight coefficients:  [[25.80915571]]
y-axis intercept:  [138.07363254]


In [8]:
# Note: we have to transform our min and max values 
# so they are in the format: array([[ 1.17]])
# This is the required format for `model.predict()`
x_min = np.array([[X.min()]])
x_max = np.array([[X.max()]])
print(f"Min X Value: {x_min}")
print(f"Max X Value: {x_max}")

Min X Value: [[0]]
Max X Value: [[91]]


In [22]:
# Calculate the y_min and y_max using model.predict and x_min and x_max
y_min = model.predict(x_min)
y_max_p = model.predict( [[93],[94],[95],[96]])
y_max_p

array([[2538.32511367],
       [2564.13426938],
       [2589.94342509],
       [2615.7525808 ]])

In [10]:
print(f"Min y Value: {y_min}")
print(f"Max y Value: {y_max}")

Min y Value: [[138.07363254]]
Max y Value: [[2486.70680224]]
