In [1]:
import pandas as pd
import graphing
import numpy as np
!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/graphing.py
!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/m0b_optimizer.py
!wget https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/Data/seattleWeather_1948-2017.csv

#Load file
df = pd.read_csv('seattleWeather_1948-2017.csv', parse_dates=['date'])

#Only keeping temps for January
df = df[[d.month == 1 for d in df.date]].copy()


--2021-09-29 17:17:48--  https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/graphing.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 21511 (21K) [text/plain]
Saving to: ‘graphing.py.1’


2021-09-29 17:17:48 (28.3 MB/s) - ‘graphing.py.1’ saved [21511/21511]

--2021-09-29 17:17:48--  https://raw.githubusercontent.com/MicrosoftDocs/mslearn-introduction-to-machine-learning/main/m0b_optimizer.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1287 (1.3K) [text/plain]
Saving to: ‘m0b_optimiz

In [2]:
#print first and last rows
df

Unnamed: 0,date,amount_of_precipitation,max_temperature,min_temperature,rain
0,1948-01-01,0.47,51,42,True
1,1948-01-02,0.59,45,36,True
2,1948-01-03,0.42,45,35,True
3,1948-01-04,0.31,45,34,True
4,1948-01-05,0.17,45,32,True
...,...,...,...,...,...
25229,2017-01-27,0.00,54,37,False
25230,2017-01-28,0.00,52,37,False
25231,2017-01-29,0.03,48,37,True
25232,2017-01-30,0.02,45,40,True


In [3]:
#Visualize data
#graphing.scatter_2D(df, label_x="date", label_y="min_temperature", title="January Temperatures (°F)")

In [4]:
#Offset date into years
df["years_since_1982"] = [(d.year + d.timetuple().tm_yday / 365.25) - 1982 for d in df.date]

#scale and offset temp to make small range
df["normalised_temperature"] = (df["min_temperature"] - np.mean(df["min_temperature"])) / np.std(df["min_temperature"])

In [5]:
#graphing.scatter_2D(df, label_x="years_since_1982", label_y="normalised_temperature", title="January Temperatures (Normalised)")

In [6]:
class tmModel:
    
    def __init__(self):
        
        #lines described by 2 params
        # Slope is angle of lines
    
        self.slope = 0
        #moves line up and down
        self.intercept = 0
        
    def predict(self, date):
        '''
        Estimates the temperature from the date
        '''
        return date * self.slope + self.intercept

model = tmModel()
print("Model made!")

Model made!


In [7]:
print(f"Model params before training: {model.intercept}, {model.slope}")
print("Model visualised before training:")
#graphing.scatter_2D(df, "years_since_1982", "normalised_temperature", trendline=model.predict)

Model params before training: 0, 0
Model visualised before training:


In [8]:
def cost_function(actual_temperatures, estimated_temperatures):
    
    '''
    Calculate difference between actual and estimates temps.
    Returns difference, and squared difference (the cost)
    
    actual_temperatures: One or more temps recorded in the past
    estimated_temperatures: Temps estimated by the model    
    '''
    
    difference = estimated_temperatures - actual_temperatures
    
    cost = sum(difference ** 2)
    
    return difference, cost

In [9]:
from m0b_optimizer import MyOptimizer

#Create an optimizer

optimizer = MyOptimizer()

In [10]:
def train_once(model_inputs, true_temperatures, last_cost:float):
    '''
    Runs training one time
    
    model_inputs: one or more dates to provide the model (dates)
    true_temperatures: Corresponding temps that happen on those dates
    
    Returns:
    
        A Boolean, as to whether training should continue
        The cost calculated (small numbers are better)
        
    '''
    #Estimate temps for all data
    estimated_temperatures = model.predict(model_inputs)
    
    #Calculate model effectiveness
    difference, cost = cost_function(true_temperatures, estimated_temperatures)
    
    #Decide to keep training or not
    #Basically if futher training doesn't increase effectiveness
    
    if cost >= last_cost:
        
        return False, cost
    else:
        
        intercept_update, slope_update = optimizer.get_parameter_updates(model_inputs, cost, difference)
        
        model.slope += slope_update
        model.intercept += intercept_update
        
        return True, cost
    
print("Training method ready")

Training method ready


In [11]:
import math

print(f"Model parameters before training:\t\t{model.intercept:.8f},\t{model.slope:.8f}")

continue_loop, cost = train_once(model_inputs=df["years_since_1982"],
                                true_temperatures = df["normalised_temperature"],
                                last_cost = math.inf)

print(f"Model params after 1 iteration:\t{model.intercept:.8f},\t{model.slope:.8f}")

Model parameters before training:		0.00000000,	0.00000000
Model params after 1 iteration:	0.00000000,	0.01006832


In [12]:
#loop time

print("Training beginning...")
last_cost = math.inf
i = 0
continue_loop = True
while continue_loop:
    
    continue_loop, last_cost = train_once(model_inputs = df["years_since_1982"],
                                                    true_temperatures = df["normalised_temperature"],
                                                    last_cost = last_cost)
   #print status
    if i % 400 == 0:
        print("Iteration:",i)
    i += 1
    
print("Training Completed!")
print(f"Model parameters after training:\t{model.intercept:.8f},\t{model.slope:.8f}")
#graphing.scatter_2D(df, "years_since_1982", "normalised_temperature", trendline=model.predict)


Training beginning...
Iteration: 0
Iteration: 400
Iteration: 800
Iteration: 1200
Iteration: 1600
Iteration: 2000
Iteration: 2400
Iteration: 2800
Iteration: 3200
Iteration: 3600
Iteration: 4000
Training Completed!
Model parameters after training:	-0.00648846,	0.01193327
