In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
from sklearn.model_selection import train_test_split

In [2]:
datafile = "weather_data.csv"

In [3]:
def get_data(filename):
    df = pd.read_csv(filename)
    X_ = df[["Humidity","Visibility (km)"]]
    Y_ = df[["Temperature (C)"]]
    
    # Spliting data into train and test sets
    X_train, X_test, y_train, y_test = train_test_split(X_, Y_, test_size=0.25, random_state=42)

   
    
    return X_train, X_test, y_train, y_test



In [4]:
X_train, X_test, y_train, y_test = get_data(datafile)
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(72321, 2) (24108, 2) (72321, 1) (24108, 1)


In [5]:
def data_iter(batch_size, X, y):
    num_examples = len(X)
    indices = list(range(num_examples))
    # The examples are read at random, in no particular order
    random.shuffle(indices)
    for i in range(0, num_examples, batch_size):
        #write your code here to retrun batch wise X,y
        yield X[i:min(i + batch_size, num_examples)], y[i:min(i + batch_size, num_examples)] 

In [6]:
#define your batch size
batch_size = 10000

for X, y in data_iter(batch_size, X_test, y_test):
    print(X, '\n', y)
    break

       Humidity  Visibility (km)
25072      0.82            0.643
76967      0.96            0.040
22153      0.90            0.983
38552      0.85            0.056
82925      0.49            0.966
...         ...              ...
16332      0.64            0.700
56181      0.75            0.940
7720       0.53            0.643
40267      0.69            0.620
27622      0.87            1.000

[10000 rows x 2 columns] 
        Temperature (C)
25072         0.563251
76967         0.407837
22153         0.677077
38552         0.276862
82925         0.822798
...                ...
16332         0.494077
56181         0.352084
7720          0.684782
40267         0.422169
27622         0.678403

[10000 rows x 1 columns]


## create and initialize model parameter

In [7]:

def create_model_parameter(mu, sigma, row, column):
    w = np.random.normal(mu, sigma, size=(row, column))
    w = np.transpose(w)
    w.reshape((2,1))
    b = 0
    return w,b

In [8]:
def model(X, w, b):  
    """The linear regression model."""
    # write your code here
    return np.dot(X, w) + b

In [9]:
def squared_loss(y_hat, y):  #@save
    """Squared loss."""
    #write your code here for loss function
    return np.mean(np.square(y_hat - y), axis=0)

In [10]:
def gradient(X, y, y_hat, loss):
    #compute gradeint of loss function with respect to params
    grads = np.array([[(2/len(X))*np.dot((y_hat - y).T, X ).T], [(2/len(X))*np.sum(y_hat - y)]], dtype=object)
    return grads
    

In [11]:
def sgd(w, b, grads, lr, batch_size):  #@save
    """Minibatch stochastic gradient descent."""
    #write your code for updating your parameter using gradient descent algorithm
    #Example: theta = theta - (lr * grad)/batch_size
    w[0][0] = w[0][0] - (lr * grads[0][0][0])/batch_size
    w[1][0] = w[1][0] - (lr * grads[0][0][1])/batch_size
    b = b - (lr * grads[1])/batch_size
    return w , b
    

In [15]:
def train(lr,num_epochs,X,y, w ,b):
    # write your own code and modify the below code as needed
    loss = np.zeros(num_epochs)
    for epoch in range(num_epochs):
        i=1
        batches = data_iter(batch_size, X, y)
        print(batches)
        for X, y in batches:
           y_hat = model(X, w, b)
           loss[epoch] = squared_loss(y_hat, y)
           grads = gradient(X, y, y_hat, loss)
           w, b = sgd(w, b, grads, lr, batch_size)
           print('batch:' , i , " samlpes:", i*batch_size)
           i += 1
        print("Epoch: ", epoch+1, '\nLoss: ', loss[epoch])
           
        # train_l = squared_loss(net(features, w, b), labels)
        # print(f'epoch {epoch + 1}, loss {float(tf.reduce_mean(train_l)):f}')
    
    return epoch, loss, w, b

In [13]:
def draw_loss(num_epochs,loss):
    plt.plot(num_epochs,loss) 
    plt.show()

In [None]:
def test(X):
    # write your own code 
    #predict temperature for the given humidity and visibility

In [16]:
LEARNING_RATE = 0.02
EPOCHS = 5
# for X, y in data_iter(batch_size, X_test, y_test):
#     print("weights", '\n', params[0],'\n', "Bias", '\n', params[1], '\n')
#     y_hat = model(X, params[0], params[1])
#     print('\n','Predictions', '\n', y_hat, '\n')
#     loss = squared_loss(y_hat, y)
#     print("loss", '\n', loss,'\n', "actual", '\n', y)
#     grads = gradient(X, y, y_hat, loss, params)
#     print(grads)
#     break
w, b = create_model_parameter(np.mean(X_train, axis=0), np.std(X_train), 1, 2)
print(w,b)
epoch, loss, w, b = train(LEARNING_RATE, EPOCHS, X_train, y_train, w, b)


[[1.17301681]
 [0.25868631]] 0
<generator object data_iter at 0x0000023433A59AF0>
batch: 1  samlpes: 10000
batch: 2  samlpes: 20000
batch: 3  samlpes: 30000
batch: 4  samlpes: 40000
batch: 5  samlpes: 50000
batch: 6  samlpes: 60000
batch: 7  samlpes: 70000
batch: 8  samlpes: 80000
Epoch:  1 
Loss:  0.30619710195433136
<generator object data_iter at 0x0000023433A59A80>
batch: 1  samlpes: 10000
Epoch:  2 
Loss:  0.30619377138175546
<generator object data_iter at 0x0000023433A59AF0>
batch: 1  samlpes: 10000
Epoch:  3 
Loss:  0.3061904408611232
<generator object data_iter at 0x0000023433A59A80>


KeyboardInterrupt: 

In [None]:
print(np.array(range(epoch+1))+1)
print(loss)

In [None]:
draw_loss(np.array(range(epoch+1))+1, loss)

In [None]:
batches = data_iter(batch_size, X, y)
print(batches)
batches.close