In [None]:
from sklearn import datasets

iris = datasets.load_iris()
iris.feature_names

In [None]:
import pandas as pd

df = pd.DataFrame({
    'sepal length (cm)': iris.data[:, 0],
    'sepal width (cm)': iris.data[:, 1],
    'petal length (cm)': iris.data[:, 2],
    'petal width (cm)': iris.data[:, 3],
    'target': iris.target
})
df['name'] = df.target.apply(lambda x: iris.target_names[x])
df

In [None]:
from matplotlib import pyplot as plt

plot_num = 0
plt.figure(figsize=(20, 24))
for feature_name in iris.feature_names:
    for target_name in iris.target_names:
        plot_num += 1
        plt.subplot(4, 3, plot_num)
        plt.hist(df[df.name == target_name][feature_name])
        plt.title(target_name)
        plt.xlabel('cm')
        plt.ylabel(feature_name)
        plt.tight_layout()

In [None]:
df[['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']].corr()

In [None]:
from sklearn.model_selection import train_test_split

X = df[iris.feature_names]
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=86)

In [None]:
from sklearn.ensemble import RandomForestClassifier

clf_rf = RandomForestClassifier(n_estimators=100)

clf_rf.fit(X_train, y_train)
y_pred = clf_rf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

In [None]:
feature_imp = pd.Series(clf_rf.feature_importances_, index=iris.feature_names).sort_values()

In [None]:
print(feature_imp)

In [None]:
from sklearn.model_selection import train_test_split

X = df[['sepal length (cm)',
 'petal length (cm)',
 'petal width (cm)']]
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=86)

In [None]:
from sklearn.ensemble import RandomForestClassifier

clf_rf = RandomForestClassifier(n_estimators=100)

clf_rf.fit(X_train, y_train)
y_pred = clf_rf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

In [None]:
from keras.datasets import fashion_mnist
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras.models import Sequential
import keras

In [None]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

In [None]:
X_train = X_train / 255
X_test = X_test / 255

In [None]:
y_train_cat = keras.utils.to_categorical(y_train, 10)
y_test_cat = keras.utils.to_categorical(y_test, 10)

In [None]:
model = keras.Sequential([
    Flatten(input_shape=(28, 28, 1)),
    Dense(128, activation='relu'),
    Dense(10, activation='softmax')
])
print(model.summary())

In [None]:
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])

In [None]:
model.fit(X_train, y_train_cat, batch_size=32, epochs=10, validation_split=0.2)

In [None]:
model.evaluate(X_test, y_test_cat)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
n = 0
x = np.expand_dims(X_test[n], axis=0)
res = model.predict(x)
print(res)
print(np.argmax(res))
plt.imshow(X_test[n], cmap=plt.cm.binary)
plt.show()

In [None]:
pred = model.predict(X_test)
pred = np.argmax(pred, axis=1)

In [None]:
print(pred.shape)

print(pred[:20])
print(y_test[:20])

In [None]:
mask = pred == y_test
print(mask[:10])

x_false = X_test[~mask]
y_false = X_test[~mask]

print(x_false.shape)

In [None]:
for i in range(5):
  print("Значение сети: "+str(y_test[i]),'\n', 'Предсказанное значение', pred[i])
  plt.imshow(x_false[i], cmap=plt.cm.binary)
  plt.show()

In [None]:
from sklearn.datasets import make_blobs, make_circles
from sklearn.preprocessing import  StandardScaler
from matplotlib import pyplot as plt

In [None]:
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels = make_blobs(n_samples=500, centers=centers, cluster_std=0.3, random_state=42)

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=labels)

In [None]:
from sklearn.cluster import KMeans, DBSCAN

In [None]:
km_blobs = KMeans(n_clusters=3)
km_blobs.fit(X)

In [None]:
km_labels = km_blobs.labels_

In [None]:
plt.scatter(X[:, 0], X[:, 1], c=km_labels)

In [None]:
X_circles, cir_labels = make_circles(n_samples=750, noise=0.04, random_state=42, factor=0.5)
plt.scatter(X_circles[:, 0], X_circles[:, 1], c=cir_labels)

In [None]:
km_circle = KMeans()
km_circle.fit(X_circles)

In [None]:
plt.scatter(X_circles[:, 0], X_circles[:, 1], c=km_circle.labels_)

In [None]:
db_circ = DBSCAN(eps=0.3, min_samples=10)
db_circ.fit(X_circles)

In [None]:
plt.scatter(X_circles[:, 0], X_circles[:, 1], c=db_circ.labels_)



In [None]:
import pandas as pd
import numpy as np

1 - возраст (фичи)
2 - пол (фичи) категориальная переменная -> закодировать
3 - индекс массы тела (фичи)
4 - количество детей (фичи) категориальная переменная -> закодировать
5 - курит/не курит (фичи) категориальная переменная -> закодировать
6 - регион проживания (фичи) категориальная переменная -> закодировать
7 - траты на мед.обслуживание (то что мы хотим предсказать)

In [None]:
df = pd.read_csv('../data/attestation.csv')
df.head()

In [None]:
X = df.drop(columns='charges')
y = df['charges']

In [None]:
X['sex'] = X['sex'].replace(['female', 'male'], [0, 1])
X

In [None]:
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder().fit(df[['region']])
transformed = enc.transform(df[['region']]).toarray()
for i in range(len(enc.categories_[0])):
    X[enc.categories_[0][i]] = transformed[:, i]
X = X.drop(columns='region')

In [None]:
X['smoker'] = X['smoker'].replace(['yes', 'no'], [0, 1])
X

In [None]:
enc = OneHotEncoder().fit(df[['children']])
transformed = enc.transform(df[['children']]).toarray()
for i in range(len(enc.categories_[0])):
    X[str(enc.categories_[0][i])] = transformed[:, i]
X = X.drop(columns='children')

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [39]:
y_pred = model.predict(X_test)
mean_squared_error(y_test, y_pred)

33756429.70235282

In [40]:
r2_score(y_test, y_pred)

0.7825655038982295