sample.py

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import json

N = 100

x = np.linspace(0, 1)
sample_x = np.linspace(0, 1, N)
norm = np.sin(2 * np.pi * x)

y_noisy = np.sin(2 * np.pi * sample_x) + np.random.normal(0, 0.3, size=sample_x.shape)

plt.plot(x, norm, label='Original graph')
plt.scatter(sample_x, y_noisy, label='Noise samples')
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.xlim(0, 1)
plt.show()

with open('lr_samples.json', 'w') as f:
    json.dump({ 'x': sample_x.tolist(), 'y': y_noisy.tolist() }, f)


linear_regression.py

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import json
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

with open('lr_samples.json', 'r') as f:
    data = json.load(f)

x = np.array(data['x']).reshape(-1, 1)
y = np.array(data['y'])

x_graph = np.linspace(0, 1, 300).reshape(-1, 1)

for deg in [1, 2, 5, 9, 15]:

    model = LinearRegression()
    env = PolynomialFeatures(deg)

    x_poly = env.fit_transform(x) # 1 x x**2 x**3 x**4 ...
    model.fit(x_poly, y) # learning
    model_line = model.predict(env.transform(x_graph)) # test or our model's line
    plt.plot(x_graph, model_line, label=f'{deg}-order model')

plt.scatter(x, y, label='Noise samples')
plt.xlim(0, 1)
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.show()

linear_regression_outlier.py

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import json
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

with open('lr_samples.json', 'r') as f:
    data = json.load(f)

x = np.array(data['x'])
y = np.array(data['y'])

x = np.append(x, np.array([0.72, 0.76, 0.78])).reshape(-1, 1)
y = np.append(y, np.array([1.5] * 3))

x_graph = np.linspace(0, 1).reshape(-1, 1)

for deg in [1, 2, 5, 9, 15]:

    model = LinearRegression()
    env = PolynomialFeatures(deg)

    x_poly = env.fit_transform(x) # 1 x x**2 x**3 x**4 ...
    model.fit(x_poly, y) # learning
    model_line = model.predict(env.transform(x_graph)) # test or our model's line
    plt.plot(x_graph, model_line, label=f'{deg}-order model')

plt.scatter(x, y, label='Noise samples')
plt.xlim(0, 1)
plt.ylim(-3, 3)
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.show()

linear_regression_ridge_lasso.py

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import json
from sklearn.linear_model import Ridge, Lasso
from sklearn.preprocessing import PolynomialFeatures

with open('lr_samples.json', 'r') as f:
    data = json.load(f)

x = np.array(data['x'])
y = np.array(data['y'])

x = np.append(x, np.array([0.72, 0.76, 0.78])).reshape(-1, 1)
y = np.append(y, np.array([1.5] * 3))

x_graph = np.linspace(0, 1).reshape(-1, 1)

env = PolynomialFeatures(15)
x_poly = env.fit_transform(x)
x_graph_poly = env.transform(x_graph)

for l in [0.0001, 0.001, 0.01]:

    model1 = Lasso(alpha=l)
    
    model1.fit(x_poly, y)
    model_line1 = model1.predict(x_graph_poly)
    plt.plot(x_graph, model_line1, label=f'λ-{l} lasso model')

    model2 = Ridge(alpha=l)

    model2.fit(x_poly, y)
    model_line2 = model2.predict(x_graph_poly)
    plt.plot(x_graph, model_line2, linestyle='--', label=f'λ-{l} ridge model')

    print(f'model1 λ-{l}:', model1.coef_, model1.intercept_)
    print(f'model2 λ-{l}:', model2.coef_, model2.intercept_)


plt.scatter(x, y, label='Noise samples')
plt.xlim(0, 1)
plt.ylim(-3, 3)
plt.legend()
plt.xlabel('x')
plt.ylabel('y')
plt.show()

classification.py

In [None]:
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_openml

dataset = fetch_openml('mnist_784')
x = dataset["data"]
y = dataset["target"].astype(int)

np.random.seed(123456)
random_index = np.random.permutation(len(x))
x = x.iloc[random_index]
y = y.iloc[random_index]

accuracy = []
cut = int(len(x) * 0.8)

x_train = x[:cut]
y_train = y[:cut]
x_test = x[cut:]
y_test = y[cut:]

ratio_accuracy = []
for factor in [0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95, 0.99]:
    cut = int(len(x) * factor)

    x_train = x[:cut]
    y_train = y[:cut]
    x_test = x[cut:]
    y_test = y[cut:]

    model = LogisticRegression(multi_class='multinomial')
    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)
    ratio_accuracy.append(accuracy_score(y_test, y_pred))

print(ratio_accuracy)

iter_accuracy = []

for iter in [10, 50, 100, 200, 300, 400, 500, 1000]:

    model = LogisticRegression(multi_class='multinomial', max_iter=iter)
    model.fit(x_train, y_train)

    y_pred = model.predict(x_test)
    iter_accuracy.append(accuracy_score(y_test, y_pred))

print(iter_accuracy)
