- Create functions in python to calculate the accuracy, precision, recall, and f1-score for two numpy arrays, a predicted value and a true value. You may use the following as a test for your functions:

- x_predict = np.array(['red', 'blue', 'red', 'red', 'blue', 'blue', 'blue', 'red'])
- x_true = np.array(['red', 'red', 'red', 'blue', 'blue', 'blue', 'red', 'red'])

In [58]:
import pandas as pd
import numpy as np
from sklearn.metrics import confusion_matrix

x_predict = np.array(['red', 'blue', 'red', 'red', 'blue', 'blue', 'blue', 'red'])
x_true = np.array(['red', 'red', 'red', 'blue', 'blue', 'blue', 'red', 'red'])

print(confusion_matrix(x_predict, x_true))

[[2 2]
 [1 3]]


In [32]:
def accuracy(x_predict, x_true):
    sum = 0

    for i in range(len(x_predict)):
        if x_predict[i] == x_true[i]:
            sum += 1
    return sum/len(x_predict)

print(accuracy(x_predict, x_true))

0.625


In [37]:
def precision(x_predict, x_true, trueVal):
    sum = 0
    total = 0

    for i in range(len(x_predict)):
        if x_predict[i] == trueVal:
            total += 1
        if (x_predict[i] == x_true[i] and x_predict[i] == trueVal):
            sum += 1
       
    return sum/total

print(precision(x_predict, x_true, 'red'))

0.75


In [42]:
def recall(x_predict, x_true, trueVal):
    sum = 0
    total = 0

    for i in range(len(x_predict)):
        if (x_true[i] == trueVal and x_predict[i] != trueVal):
            total += 1
        if (x_predict[i] == x_true[i] and x_predict[i] == trueVal):
            sum += 1
    
    return sum/(sum + total)

print(recall(x_predict, x_true, 'red'))

0.6


In [45]:
def f1score(x_predict, x_true, trueVal):
    p = precision(x_predict, x_true, trueVal)
    r = recall(x_predict, x_true, trueVal)

    return (2 * p * r) / (p + r)

print(f1score(x_predict, x_true, 'red'))

0.6666666666666665


In [49]:
def readTxt(filepath):
    with open(filepath, 'r') as file:
        # Read the contents of the file
        content = file.read()
 
        # Split the contents into lines
        return content.splitlines()
 
# Now 'lines' contains the lines of the file as elements of a list
predicted = readTxt('predicted_values.txt')
actual = readTxt('true_values.txt')

In [53]:
print(accuracy(predicted, actual))
print(precision(predicted, actual, 'cold'))
print(recall(predicted, actual, 'cold'))
print(f1score(predicted, actual, 'cold'))

0.92
0.8620689655172413
1.0
0.9259259259259259


In [56]:
print(precision(predicted, actual, 'hot'))
print(recall(predicted, actual, 'hot'))
print(f1score(predicted, actual, 'hot'))

1.0
0.84
0.9130434782608696


In [57]:
y_true = np.array([3, 7, 11, 15, 21])
y_predict = np.array([2.5, 7.2, 11.0, 15.4, 20.6])

In [59]:
def mae(actual, calculated):
    sum = 0

    for i in range(len(actual)):
        sum += abs(calculated[i] - actual[i])

    return sum/len(actual)

print(mae(y_predict, y_true))

0.2999999999999998


In [60]:
def mse(actual, calculated):
    sum = 0

    for i in range(len(actual)):
        sum += pow((calculated[i] - actual[i]), 2)

    return sum/len(actual)

print(mse(y_predict, y_true))

0.12199999999999984


In [61]:
def sse(actual, calculated):
    sum = 0

    for i in range(len(actual)):
        sum += pow((calculated[i] - actual[i]), 2)

    return sum

print(sse(y_predict, y_true))

0.6099999999999992


In [62]:
def rmse(actual, calculated):
    sum = 0

    for i in range(len(actual)):
        sum += pow((calculated[i] - actual[i]), 2)

    return np.sqrt(sum/len(actual))

print(rmse(y_predict, y_true))

0.3492849839314594


In [67]:
def rmsle(actual, calculated):
    sum = 0

    for i in range(len(actual)):
        sum += np.log(((1 + calculated[i]) / (1 + actual[i])))

    sum = sum / len(actual)
    return np.sqrt(pow(sum/len(actual), 2))

print(rmsle(y_predict, y_true))

0.004099812244479047


In [68]:
X = np.loadtxt('regression_data.csv', delimiter=',', skiprows=1)
x = X[:,0]       # The x values for training
y = X[:,1]       # The y values for training

from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()
lin_reg.fit(x.reshape(-1,1), y.reshape(-1,1))

y_pred = lin_reg.predict(x.reshape(-1,1))

In [72]:
print(mae(y, y_pred))
print(sse(y, y_pred))
print(mse(y, y_pred))
print(rmse(y, y_pred))

[19.33471565]
[98833.74975002]
[494.16874875]
[22.22990663]
