# Лэптопы

In [None]:
import numpy as np
import pandas as pd

In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))


def sigmoid_dx(x):
  return (x) * (1 - x)

In [None]:
# Конструктор - (done)
# Функция предикта - (done)
# Функция обучения (back-prop) - (done)
# Функция активации - (done)
# Метрики - (done)

class MLP:
  def __init__(self, input_layer_size, hidden_layer_size, output_layer_size, learning_rate=0.01, n_iters=1000):
    self.input_size = input_layer_size
    self.hidden_size = hidden_layer_size
    self.output_size = output_layer_size
    self.learning_rate = learning_rate
    self.epochs = n_iters

    self.weights_0_1 = np.random.randn(self.input_size, self.hidden_size)
    self.bias_0_1 = np.random.randn(self.hidden_size)

    self.weights_1_2 = np.random.randn(self.hidden_size, self.output_size)
    self.bias_1_2 = np.random.randn(self.output_size)

  def predict(self, X):
    self.hidden_input = np.dot(X, self.weights_0_1) + self.bias_0_1
    self.hidden_output = sigmoid(self.hidden_input)
    self.output = np.dot(self.hidden_output, self.weights_1_2) + self.bias_1_2

    return self.output

  def backprop(self, X, y):
    output_error = self.output - y
    output_delta = output_error
    hidden_error = np.dot(output_delta, self.weights_1_2.T)
    hidden_delta = hidden_error * sigmoid_dx(self.hidden_output)

    self.weights_1_2 -= self.learning_rate * np.dot(self.hidden_output.T, output_delta)
    self.bias_1_2 -= self.learning_rate * np.sum(output_delta, axis=0)
    self.weights_0_1 -= self.learning_rate * np.dot(X.T, hidden_delta)
    self.bias_0_1 -= self.learning_rate * np.sum(hidden_delta, axis=0)

In [None]:
def MSE(y_true, y_pred):
  return np.mean((y_true - y_pred) ** 2)

def MAE(y_true, y_pred):
  return np.mean(np.abs(y_true - y_pred))

def root_mean_squared_error(y_true, y_pred):
    mse = MSE(y_true, y_pred)
    return np.sqrt(mse)

In [None]:
df = pd.read_csv('/content/laptops_preprocessed.csv')
df

Unnamed: 0,Brand,Processor_Speed,RAM_Size,Storage_Capacity,Screen_Size,Weight,Price
0,1,0.932352,0.428571,0.344086,0.028967,0.214214,0.353939
1,0,0.563441,0.000000,1.000000,0.050097,0.421198,0.923946
2,4,0.695649,0.000000,0.000000,0.140771,0.009532,0.028917
3,0,0.922682,0.428571,0.344086,0.212308,0.860589,0.355609
4,0,0.706292,1.000000,1.000000,0.666055,0.733374,0.976500
...,...,...,...,...,...,...,...
995,3,0.736646,0.000000,1.000000,0.263656,0.388553,0.923387
996,2,0.510253,0.142857,0.000000,0.279101,0.585153,0.023242
997,2,0.679140,0.000000,0.344086,0.276918,0.465543,0.320142
998,1,0.037235,0.142857,0.000000,0.034061,0.621053,0.033587


In [None]:
from sklearn.model_selection import train_test_split

X = df.drop('Price', axis=1)
y = df['Price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=48239)

print(X_train.shape)
print(X_test.shape)

print(y_train.shape)
print(y_test.shape)

print(X_train.values)

(800, 6)
(200, 6)
(800,)
(200,)
[[3.         0.45151365 1.         0.34408602 0.4785813  0.33368794]
 [0.         0.5232619  0.         0.34408602 0.19113153 0.45256328]
 [2.         0.13249915 0.42857143 1.         0.7182593  0.62653078]
 ...
 [1.         0.26939778 0.14285714 1.         0.33164486 0.03319022]
 [2.         0.35182607 1.         0.34408602 0.21080244 0.77279338]
 [3.         0.78918468 0.14285714 0.         0.22978621 0.70145181]]


In [None]:
mlp = MLP(input_layer_size=X_train.shape[1], hidden_layer_size=3, output_layer_size=1, n_iters=1000)
batch_size = 40

for epoch in range(mlp.epochs):
  indices = np.arange(X_train.shape[0])
  np.random.shuffle(indices)
  X_train_shuffled = X_train.values[indices]
  y_train_shuffled = y_train.values[indices]

  for i in range(0, X_train.shape[0], batch_size):
    X_batch = X_train_shuffled[i:i+batch_size]
    y_batch = y_train_shuffled[i:i+batch_size]

    for j in range(X_batch.shape[0]):
      X_sample = X_batch[j:j+1]
      y_sample = y_batch[j:j+1]

      output = mlp.predict(X_sample)
      loss = MSE(y_sample, output)
      mlp.backprop(X_sample, y_sample)

  if epoch % 100 == 0 or epoch == mlp.epochs - 1:
    print(f'Epoch {epoch + 1}, Loss: {loss}')

Epoch 1, Loss: 0.013554912777157525
Epoch 101, Loss: 0.00039015044014034005
Epoch 201, Loss: 2.839517285957328e-07
Epoch 301, Loss: 0.0006792313214730104
Epoch 401, Loss: 5.035705169800732e-05
Epoch 501, Loss: 4.175806833686289e-05
Epoch 601, Loss: 3.6913948249236575e-06
Epoch 701, Loss: 1.6374078740738914e-05
Epoch 801, Loss: 8.241776578008765e-05
Epoch 901, Loss: 2.3126931701060787e-05
Epoch 1000, Loss: 0.00018562156083669213


In [None]:
predictions = []

for i in range(X_test.shape[0]):
  X_sample = X_test.values[i:i+1]
  prediction = mlp.predict(X_sample)
  predictions.append(prediction)

predictions = np.array(predictions)
predictions = predictions.flatten()

mse = MSE(y_test.values, predictions)
mae = MAE(y_test.values, predictions)
rmse = root_mean_squared_error(y_test.values, predictions)
print(f'Test MSE loss: {mse}')
print(f'Test MAE loss: {mae}')
print(f'Test RMSE loss: {rmse}')

for sample_index in range (0, 10):
  print(f'Index: {sample_index}, predicted: {"{:.2f}".format(predictions[sample_index])}, actual value: {"{:.2f}".format(y_test.iloc[sample_index])}')

Test MSE loss: 8.16699218392481e-05
Test MAE loss: 0.006834501138340743
Test RMSE loss: 0.009037141242630222
Index: 0, predicted: 0.97, actual value: 0.98
Index: 1, predicted: 0.03, actual value: 0.03
Index: 2, predicted: 0.03, actual value: 0.03
Index: 3, predicted: 0.32, actual value: 0.33
Index: 4, predicted: 0.02, actual value: 0.02
Index: 5, predicted: 0.02, actual value: 0.02
Index: 6, predicted: 0.92, actual value: 0.92
Index: 7, predicted: 0.94, actual value: 0.94
Index: 8, predicted: 0.96, actual value: 0.97
Index: 9, predicted: 0.32, actual value: 0.32


# Грибы

In [None]:
class MLPClassifier:
    def __init__(self, input_size, hidden_size, output_size):
        self.weights_input_hidden = np.random.randn(input_size, hidden_size)
        self.bias_input_hidden = np.random.randn(hidden_size)
        self.weights_hidden_output = np.random.randn(hidden_size, output_size)
        self.bias_hidden_output = np.random.randn(output_size)

    def softmax(self, z):
        exp_z = np.exp(z)
        return exp_z / np.sum(exp_z, axis=1, keepdims=True)

    def forward(self, inputs):
        hidden_inputs = np.dot(inputs, self.weights_input_hidden) + self.bias_input_hidden
        self.hidden_outputs = np.tanh(hidden_inputs)
        final_inputs = np.dot(self.hidden_outputs, self.weights_hidden_output) + self.bias_hidden_output
        final_outputs = self.softmax(final_inputs)
        return final_outputs

    def backward(self, inputs, targets, output_probs, learning_rate):
        d_loss_d_output = output_probs - targets
        d_loss_d_weights_hidden_output = np.dot(self.hidden_outputs.T, d_loss_d_output)
        d_loss_d_bias_hidden_output = np.sum(d_loss_d_output, axis=0)
        d_loss_d_hidden = np.dot(d_loss_d_output, self.weights_hidden_output.T) * (1 - np.square(self.hidden_outputs))
        d_loss_d_weights_input_hidden = np.dot(inputs.T, d_loss_d_hidden)
        d_loss_d_bias_input_hidden = np.sum(d_loss_d_hidden, axis=0)

        self.weights_hidden_output -= learning_rate * d_loss_d_weights_hidden_output
        self.bias_hidden_output -= learning_rate * d_loss_d_bias_hidden_output
        self.weights_input_hidden -= learning_rate * d_loss_d_weights_input_hidden
        self.bias_input_hidden -= learning_rate * d_loss_d_bias_input_hidden

    def train(self, inputs, targets, learning_rate, epochs, batch_size):
        for epoch in range(epochs):
          indices = np.arange(inputs.shape[0])
          np.random.shuffle(indices)
          inputs_shuffled = inputs.values[indices]
          targets_shuffled = targets.values[indices]

          for i in range(0, inputs.shape[0], batch_size):
            inputs_batch = inputs_shuffled[i:i+batch_size]
            targets_batch = targets_shuffled[i:i+batch_size]
            for j in range(inputs_batch.shape[0]):
              inputs_sample = inputs_batch[j:j+1]
              targets_sample = targets_batch[j:j+1]

              output_probs = self.forward(inputs_sample)
              self.backward(inputs_sample, targets_sample, output_probs, learning_rate)
              loss = MSE(targets_sample, output_probs)
          if (epoch % 5 == 0 or epoch == epochs - 1):
            print(f'Epoch {epoch + 1}, Loss: {loss}')

In [None]:
!pip install ucimlrepo



In [None]:
from ucimlrepo import fetch_ucirepo

# fetch dataset
mushroom = fetch_ucirepo(id=73)

# data (as pandas dataframes)
X = mushroom.data.features
y = mushroom.data.targets

# metadata
print(mushroom.metadata)

# variable information
print(mushroom.variables)


{'uci_id': 73, 'name': 'Mushroom', 'repository_url': 'https://archive.ics.uci.edu/dataset/73/mushroom', 'data_url': 'https://archive.ics.uci.edu/static/public/73/data.csv', 'abstract': 'From Audobon Society Field Guide; mushrooms described in terms of physical characteristics; classification: poisonous or edible', 'area': 'Biology', 'tasks': ['Classification'], 'characteristics': ['Multivariate'], 'num_instances': 8124, 'num_features': 22, 'feature_types': ['Categorical'], 'demographics': [], 'target_col': ['poisonous'], 'index_col': None, 'has_missing_values': 'yes', 'missing_values_symbol': 'NaN', 'year_of_dataset_creation': 1981, 'last_updated': 'Thu Aug 10 2023', 'dataset_doi': '10.24432/C5959T', 'creators': [], 'intro_paper': None, 'additional_info': {'summary': "This data set includes descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms in the Agaricus and Lepiota Family (pp. 500-525).  Each species is identified as definitely edible, definitely po

In [None]:
X.isna().sum()

cap-shape                      0
cap-surface                    0
cap-color                      0
bruises                        0
odor                           0
gill-attachment                0
gill-spacing                   0
gill-size                      0
gill-color                     0
stalk-shape                    0
stalk-root                  2480
stalk-surface-above-ring       0
stalk-surface-below-ring       0
stalk-color-above-ring         0
stalk-color-below-ring         0
veil-type                      0
veil-color                     0
ring-number                    0
ring-type                      0
spore-print-color              0
population                     0
habitat                        0
dtype: int64

In [None]:
X['stalk-root'] = X['stalk-root'].fillna('n')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X['stalk-root'] = X['stalk-root'].fillna('n')


In [None]:
X.duplicated().sum()

0

In [None]:
X = pd.get_dummies(X, dtype='float')
y = pd.get_dummies(y, dtype='float')
X

Unnamed: 0,cap-shape_b,cap-shape_c,cap-shape_f,cap-shape_k,cap-shape_s,cap-shape_x,cap-surface_f,cap-surface_g,cap-surface_s,cap-surface_y,...,population_s,population_v,population_y,habitat_d,habitat_g,habitat_l,habitat_m,habitat_p,habitat_u,habitat_w
0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,...,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
4,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8119,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
8120,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
8121,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
8122,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [None]:
mushrooms_mlp = MLPClassifier(input_size=X.shape[1], hidden_size=int(X.shape[1] / 2), output_size=y.shape[1])

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=7273715)

print(X_train.shape)
print(X_test.shape)

print(y_train.shape)
print(y_test.shape)

print(X_train.values)

(6499, 117)
(1625, 117)
(6499, 2)
(1625, 2)
[[0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [None]:
mushrooms_mlp.train(X_train, y_train, learning_rate=0.005, epochs=50, batch_size=32)

Epoch 1, Loss: 0.0004676726718804476
Epoch 6, Loss: 4.256813246658771e-05
Epoch 11, Loss: 2.307762475704378e-13
Epoch 16, Loss: 1.6344345229119446e-05
Epoch 21, Loss: 1.757773332607807e-17
Epoch 26, Loss: 2.5436476172723884e-11
Epoch 31, Loss: 1.0317892521931305e-10
Epoch 36, Loss: 1.1353000875335431e-13
Epoch 41, Loss: 8.742607391976636e-15
Epoch 46, Loss: 6.24853146766054e-17
Epoch 50, Loss: 1.3403484357384608e-13


In [None]:
def accuracy_score(y_true, y_pred):
    correct_predictions = np.sum(np.argmax(y_true, axis=1) == np.argmax(y_pred, axis=1))
    total_predictions = len(y_true)
    return correct_predictions / total_predictions

def precision_score(y_true, y_pred):
    true_positives = np.sum((np.argmax(y_true, axis=1) == 1) & (np.argmax(y_pred, axis=1) == 1))
    false_positives = np.sum((np.argmax(y_true, axis=1) == 0) & (np.argmax(y_pred, axis=1) == 1))
    return true_positives / (true_positives + false_positives)

def recall_score(y_true, y_pred):
    true_positives = np.sum((np.argmax(y_true, axis=1) == 1) & (np.argmax(y_pred, axis=1) == 1))
    false_negatives = np.sum((np.argmax(y_true, axis=1) == 1) & (np.argmax(y_pred, axis=1) == 0))
    return true_positives / (true_positives + false_negatives)

def f1_score(y_true, y_pred):
    precision = precision_score(y_true, y_pred)
    recall = recall_score(y_true, y_pred)
    return 2 * (precision * recall) / (precision + recall)


In [None]:
predictions = []

for i in range(X_test.shape[0]):
  X_sample = X_test.values[i:i+1]
  prediction = mushrooms_mlp.forward(X_sample)

  predictions.append(prediction)

predictions = np.array(predictions)
predictions = np.squeeze(predictions, axis=1)
# print(predictions)
# print(y_test.values)

accuracy = accuracy_score(y_test.values, predictions)
precision = precision_score(y_test.values, predictions)
recall = recall_score(y_test.values, predictions)
f1 = f1_score(y_test.values, predictions)

print(f'Test accuracy: {"{:.3f}".format(accuracy)}')
print(f'Test precision: {"{:.3f}".format(precision)}')
print(f'Test recall: {"{:.3f}".format(recall)}')
print(f'Test f1 score: {"{:.3f}".format(f1)}')

Test accuracy: 1.000
Test precision: 1.000
Test recall: 1.000
Test f1 score: 1.000
