In [4]:
import numpy as np

def closed_form(X, Y, lambda_factor):
    """
    Computes the closed form solution of linear regression with L2 regularization

    Args:
        X - (n, d + 1) NumPy array (n datapoints each with d features plus the bias feature in the
        first dimension)
        Y - (n, ) NumPy array containing the labels (a number from 0-9) for each
            data point
        lambda_factor - the regularization constant (scalar)
    Returns:
        theta - (d + 1, ) NumPy array containing the weights of linear regression. Note that theta[0]
        represents the y-axis intercept of the model and therefore X[0] = 1
    """
    iden = np.identity(X.shape[1])
    X_transpose = np.transpose(X)
    
#     import pdb; pdb.set_trace()
    temp  = np.matmul(np.linalg.inv(np.matmul(X_transpose, X) + lambda_factor*iden), X_transpose)
    
    return  np.matmul(temp, Y)

def main():
    X = np.array([[0.60227841, 0.74436559],
                  [0.53481431, 0.78738648],
                  [0.93396077, 0.59395608],
                  [0.45269031, 0.29321725],
                  [0.58479268, 0.48238035],
                  [0.40425954, 0.34427643],
                  [0.45220547, 0.9552927 ],
                  [0.81885683, 0.54244323],
                  [0.81001823, 0.40121082],
                  [0.63021481, 0.18217416],
                  [0.70110435, 0.15351914],
                  [0.58198765, 0.64942741],
                  [0.39737362, 0.77465253],
                  [0.197242,   0.27217859],
                  [0.77380844, 0.70840771],
                  [0.44428966, 0.14544632],
                  [0.26894811, 0.37065758],
                  [0.69747595, 0.55191086]])
    
    Y = np.array([0.71520912,
                  0.82348074,
                  0.62664771,
                  0.88708053,
                  0.62268969,
                  0.51701204,
                  0.56037225,
                  0.36652699,
                  0.26138652,
                  0.70599571,
                  0.93097065,
                  0.8384873,
                  0.64422265,
                  0.40349807,
                  0.7130443,
                  0.42234453,
                  0.29523068,
                  0.67227889])
    
    lambda_factor = 0.13999552731957732
    
    theta = closed_form(X, Y, lambda_factor)
    print(f'Theta: {theta}')

In [6]:
if __name__=="__main__":
    main()

Theta: [0.6515883  0.39162426]


In [11]:
def tokezine(text):
    token = text.split('\n')
#     token = [i.strip() for i in token]

    for i in token:
        if i == '':
            del i

    token = tokezine(token)
    token = remove_tab(token)
    token = [float(i) for i in token]
    
    return token

def main():
    text = """[0.51430936 0.92470543 0.98510953 0.61202715]
 [0.58396705 0.44613534 0.66779527 0.09668949]
 [0.80024258 0.23906493 0.31191937 0.95320372]
 [0.55387276 0.98925849 0.99923472 0.84743737]
 [0.82736896 0.96563909 0.13410083 0.98980752]
 [0.87868919 0.76570555 0.68343685 0.82489578]
 [0.71566387 0.63583229 0.58943712 0.82083885]
 [0.45182944 0.07400979 0.45568278 0.78347724]
 [0.44838283 0.09761183 0.48903315 0.18072152]
 [0.18204222 0.1986643  0.96881528 0.88145775]
 [0.73381456 0.66719631 0.37179778 0.99774641]
 [0.0350274  0.20374521 0.38481612 0.33226898]
 [0.65956888 0.13678739 0.52880769 0.72403686]
 [0.20019439 0.40817217 0.74837708 0.71260825]
 [0.28828821 0.87427699 0.89226254 0.86436389]
 [0.78814079 0.95381412 0.61114895 0.38874517]
 [0.14465708 0.78509964 0.26870203 0.3583575 ]
 [0.77173285 0.83553497 0.80579013 0.90558239]
 [0.25618548 0.06113822 0.60277871 0.5395311 ]"""
    token = tokezine(text)
    
    return token
    
if __name__=="__main__":
    token = main()
    print(token)

AttributeError: 'list' object has no attribute 'split'

In [58]:
def tokezine(text):
    token = text.split('\n')
    return token

text = """[0.51430936 0.92470543 0.98510953 0.61202715]
[0.58396705 0.44613534 0.66779527 0.09668949]
[0.80024258 0.23906493 0.31191937 0.95320372]
[0.55387276 0.98925849 0.99923472 0.84743737]
[0.82736896 0.96563909 0.13410083 0.98980752]
[0.87868919 0.76570555 0.68343685 0.82489578]
[0.71566387 0.63583229 0.58943712 0.82083885]
[0.45182944 0.07400979 0.45568278 0.78347724]
[0.44838283 0.09761183 0.48903315 0.18072152]
[0.18204222 0.19866430 0.96881528 0.88145775]
[0.73381456 0.66719631 0.37179778 0.99774641]
[0.03502740 0.20374521 0.38481612 0.33226898]
[0.65956888 0.13678739 0.52880769 0.72403686]
[0.20019439 0.40817217 0.74837708 0.71260825]
[0.28828821 0.87427699 0.89226254 0.86436389]
[0.78814079 0.95381412 0.61114895 0.38874517]
[0.14465708 0.78509964 0.26870203 0.35835750]
[0.77173285 0.83553497 0.80579013 0.90558239]
[0.25618548 0.06113822 0.60277871 0.53953110]"""

temp = tokezine(text)
# temp = [i.replace(' ', ', ') for i in temp]
temp = [i.replace('  ', '') for i in temp]
temp = [i.replace('[', '') for i in temp]
temp = [i.replace(']', '') for i in temp]
temp = [i.split(' ') for i in temp]
for i in range(len(temp)):
    for j in range(len(temp[i])):
        temp[i][j] = float(temp[i][j])
train_x = np.array(temp)
train_x

array([[0.51430936, 0.92470543, 0.98510953, 0.61202715],
       [0.58396705, 0.44613534, 0.66779527, 0.09668949],
       [0.80024258, 0.23906493, 0.31191937, 0.95320372],
       [0.55387276, 0.98925849, 0.99923472, 0.84743737],
       [0.82736896, 0.96563909, 0.13410083, 0.98980752],
       [0.87868919, 0.76570555, 0.68343685, 0.82489578],
       [0.71566387, 0.63583229, 0.58943712, 0.82083885],
       [0.45182944, 0.07400979, 0.45568278, 0.78347724],
       [0.44838283, 0.09761183, 0.48903315, 0.18072152],
       [0.18204222, 0.1986643 , 0.96881528, 0.88145775],
       [0.73381456, 0.66719631, 0.37179778, 0.99774641],
       [0.0350274 , 0.20374521, 0.38481612, 0.33226898],
       [0.65956888, 0.13678739, 0.52880769, 0.72403686],
       [0.20019439, 0.40817217, 0.74837708, 0.71260825],
       [0.28828821, 0.87427699, 0.89226254, 0.86436389],
       [0.78814079, 0.95381412, 0.61114895, 0.38874517],
       [0.14465708, 0.78509964, 0.26870203, 0.3583575 ],
       [0.77173285, 0.83553497,

In [64]:
train_y = "1 1 1 0 0 1 1 0 0 0 1 1 0 1 0 0 0 0 0"
temp = train_y.split(' ')
temp = [int(i) for i in temp]
train_y = np.array(temp)
train_y

array([1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0])

In [68]:
text = """[[0.42631019 0.90820080 0.51895703 0.25101084]
[0.54798766 0.38790428 0.63678979 0.26362676]
[0.33752975 0.10926614 0.16274226 0.49040325]
[0.93006607 0.03197319 0.90033765 0.99838176]
[0.19299287 0.05604450 0.51279381 0.04177533]
[0.35941983 0.71008495 0.72273757 0.73749200]
[0.78301067 0.47418480 0.19036644 0.53177781]
[0.14631623 0.51798260 0.00220434 0.07109351]
[0.97461135 0.54790723 0.29042913 0.89126737]
[0.71354786 0.43566224 0.20258786 0.09081258]
[0.76136292 0.44890751 0.47702086 0.40464075]
[0.30777316 0.75410085 0.73738859 0.63311266]]"""

temp = tokezine(text)
# temp = [i.replace(' ', ', ') for i in temp]
temp = [i.replace('  ', '') for i in temp]
temp = [i.replace('[', '') for i in temp]
temp = [i.replace(']', '') for i in temp]
temp = [i.split(' ') for i in temp]
for i in range(len(temp)):
    for j in range(len(temp[i])):
        temp[i][j] = float(temp[i][j])
test_x = np.array(temp)
test_x

array([[0.42631019, 0.9082008 , 0.51895703, 0.25101084],
       [0.54798766, 0.38790428, 0.63678979, 0.26362676],
       [0.33752975, 0.10926614, 0.16274226, 0.49040325],
       [0.93006607, 0.03197319, 0.90033765, 0.99838176],
       [0.19299287, 0.0560445 , 0.51279381, 0.04177533],
       [0.35941983, 0.71008495, 0.72273757, 0.737492  ],
       [0.78301067, 0.4741848 , 0.19036644, 0.53177781],
       [0.14631623, 0.5179826 , 0.00220434, 0.07109351],
       [0.97461135, 0.54790723, 0.29042913, 0.89126737],
       [0.71354786, 0.43566224, 0.20258786, 0.09081258],
       [0.76136292, 0.44890751, 0.47702086, 0.40464075],
       [0.30777316, 0.75410085, 0.73738859, 0.63311266]])

In [70]:
from sklearn.svm import LinearSVC


def one_vs_rest_svm(train_x, train_y, test_x):
    """
    Trains a linear SVM for binary classification

    Args:
        train_x - (n, d) NumPy array (n datapoints each with d features)
        train_y - (n, ) NumPy array containing the labels (0 or 1) for each training data point
        test_x - (m, d) NumPy array (m datapoints each with d features)
    Returns:
        pred_test_y - (m,) NumPy array containing the labels (0 or 1) for each test data point
    """
    clf = LinearSVC(random_state=0, C=.1)  # Initiate method
    clf.fit(train_x, train_y)

    return clf.predict(test_x)

if __name__=="__main__":
    print(one_vs_rest_svm(train_x, train_y, test_x))

[0 0 0 0 0 0 0 0 0 0 0 0]
