In [None]:
import sys
print(f'Python: {sys.version}')

import numpy as np
print(f'numpy: {np.__version__}')

import pandas as pd
print(f'pandas: {pd.__version__}')

import matplotlib
print(f'matplotlib: {matplotlib.__version__}')

import seaborn as sns
print(f'seaborn: {sns.__version__}')

import sklearn as sk
print(f'sklearn: {sk.__version__}')

In [None]:
iris = sns.load_dataset('iris')

In [None]:
type(iris)

In [None]:
iris

In [None]:
iris.shape

In [None]:
iris.info()

In [None]:
iris.describe()

In [None]:
iris['species'].value_counts()

In [None]:
import matplotlib.pyplot as plt
plt.figure()
iris.boxplot(by="species", figsize=(15, 10))
plt.show()

In [None]:
sns.pairplot(iris, hue='species')

In [None]:
g = sns.pairplot(iris, hue='species')
g = g.map_lower(sns.kdeplot)

In [None]:
iris_feature = iris.drop('species', axis=1)
iris_target = iris['species']
print('iris_feature')
print(iris_feature)
print('iris_target')
print(iris_target)

In [None]:
from sklearn.model_selection import train_test_split
iris_feature_train, iris_feature_test, iris_target_train, iris_target_test = train_test_split(iris_feature, iris_target, test_size=1/3, random_state=12345)
print(iris_feature_train.shape)
print(iris_feature_test.shape)
print(iris_target_train.shape)
print(iris_target_test.shape)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(iris_feature_train, iris_target_train)
iris_target_predicted = knn.predict(iris_feature_test)

In [None]:
for test, predicted in zip(iris_target_test.tolist(), iris_target_predicted.tolist()):
    print(f'{"" if test == predicted else "* "}{test} {predicted}')

In [None]:
from sklearn.metrics import confusion_matrix
confusion_matrix(iris_target_test, iris_target_predicted)

In [None]:
print(pd.DataFrame(
    confusion_matrix(iris_target_test, iris_target_predicted),
    index=['setosa', 'veriscolor', 'virginica'],
    columns=['setosa', 'veriscolor', 'virginica']
))

In [None]:
from sklearn import metrics
print(f'{100*metrics.accuracy_score(iris_target_test, iris_target_predicted):.1f}%')

In [None]:
knn.predict(pd.DataFrame([[5.1, 2.1, 1.1, 0.2]], columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width']))[0]

In [None]:
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
kfold = StratifiedKFold(n_splits=10, random_state=54321, shuffle=True)
cross_val_score(knn, iris_feature, iris_target, cv=kfold, scoring='accuracy')

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score

for name, model in [
    ('Logistic Regression', LogisticRegression(solver='liblinear')),
    ('Linear Discriminant Analysis', LinearDiscriminantAnalysis()),
    ('K-Neighbors Classifier', KNeighborsClassifier()),
    ('Decision Tree Classifier', DecisionTreeClassifier()),
    ('Gaussian Naive Bayes', GaussianNB()),
    ('C-Support Vector Classification', SVC()),
]:
    kfold = StratifiedKFold(n_splits=10, random_state=11111, shuffle=True)
    cv_results = cross_val_score(model, iris_feature, iris_target, cv=kfold, scoring='accuracy')
    print(f'{name}: {100*cv_results.mean():.2f}%')

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score

iris_lr_predictor = iris[['petal_length']]
iris_lr_to_predict = iris['petal_width']
iris_lr_predictor_train, iris_lr_predictor_test, iris_lr_to_predict_train, iris_lr_to_predict_test = train_test_split(iris_lr_predictor, iris_lr_to_predict, test_size=1/3, random_state=12345)

lr = LinearRegression()
lr.fit(iris_lr_predictor_train.values, iris_lr_to_predict_train.values)
print('Coeficients:', lr.coef_)
print('Intercept:', lr.intercept_)

In [None]:
plt.scatter(iris_lr_predictor_train, iris_lr_to_predict_train)
xfit = np.array([0, 8])
yfit = lr.predict(xfit[:, np.newaxis])
plt.plot(xfit, yfit)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
iris_lr_predicted_train = lr.predict(iris_lr_predictor_train.values)
print('R^2:', r2_score(iris_lr_to_predict_train, iris_lr_predicted_train))
print('Mean Absolute Error:', mean_absolute_error(iris_lr_to_predict_train, iris_lr_predicted_train))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(iris_lr_to_predict_train, iris_lr_predicted_train)))

In [None]:
iris_lr_predicted_test = lr.predict(iris_lr_predictor_test.values)
print('R^2:', r2_score(iris_lr_to_predict_test, iris_lr_predicted_test))
print('Mean Absolute Error:', mean_absolute_error(iris_lr_to_predict_test, iris_lr_predicted_test))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(iris_lr_to_predict_test, iris_lr_predicted_test)))

In [None]:
iris_multi_lr_predictor = iris[['sepal_length', 'sepal_width', 'petal_length']]
iris_multi_lr_to_predict = iris['petal_width']
iris_multi_lr_predictor_train, iris_multi_lr_predictor_test, iris_multi_lr_to_predict_train, iris_multi_lr_to_predict_test = train_test_split(iris_multi_lr_predictor, iris_multi_lr_to_predict, test_size=1/3, random_state=12345)

mlr = LinearRegression()
mlr.fit(iris_multi_lr_predictor_train.values, iris_multi_lr_to_predict_train.values)
print('Coeficients:', mlr.coef_)
print('Intercept:', mlr.intercept_)

In [None]:
iris_multi_lr_predicted_train = mlr.predict(iris_multi_lr_predictor_train.values)
print('R^2:', r2_score(iris_multi_lr_to_predict_train, iris_multi_lr_predicted_train))
print('Mean Absolute Error:', mean_absolute_error(iris_multi_lr_to_predict_train, iris_multi_lr_predicted_train))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(iris_multi_lr_to_predict_train, iris_multi_lr_predicted_train)))

In [None]:
iris_multi_lr_predicted_test = mlr.predict(iris_multi_lr_predictor_test.values)
print('R^2:', r2_score(iris_multi_lr_to_predict_test, iris_multi_lr_predicted_test))
print('Mean Absolute Error:', mean_absolute_error(iris_multi_lr_to_predict_test, iris_multi_lr_predicted_test))
print('Mean Root Squared Error:', np.sqrt(mean_squared_error(iris_multi_lr_to_predict_test, iris_multi_lr_predicted_test)))

In [None]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(iris_feature)
iris_feature_2d = pca.transform(iris_feature)
iris_2d = pd.DataFrame({
    'PCA1': iris_feature_2d[:, 0],
    'PCA2': iris_feature_2d[:, 1],
    'species': iris_target.values,
})
iris_2d

In [None]:
print(pca.components_)
print(pca.mean_)

In [None]:
pca.explained_variance_ratio_

In [None]:
sum(pca.explained_variance_ratio_)

In [None]:
sns.lmplot(x='PCA1', y='PCA2', hue='species', data=iris_2d, fit_reg=False);

In [None]:
from sklearn.preprocessing import StandardScaler

sc = StandardScaler()
sc.fit(iris_feature)
print(sc.mean_)
print(sc.var_)

In [None]:
feature_names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
for i in range(150):
    print(i)
    for j in range(4):
        print(f'  {feature_names[j]}: {pca.mean_[j] + (iris_feature_2d[i, 0] * pca.components_[0][j] + iris_feature_2d[i, 1] * pca.components_[1][j]):.2f} {iris_feature.iloc[i, j]}')