# Goal

In this notebook, examples in chapter one of the reference book will be investigated. Therefor I will study functions, their inputs and outputs.

# A. Required Libraries

In [5]:
import numpy as np
from scipy import sparse
import pandas as pd
import scipy as sp
import sys
import sklearn
import mglearn
import IPython
import matplotlib
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

## B. Examples

Each section of this part consists of the code and the explanation afterward.

## B.1

In [3]:
 x = np.array([[1, 2, 3], [4, 5, 6]])
 print("x:\n{}".format(x))

x:
[[1 2 3]
 [4 5 6]]


`np.array`: Creates a 2D NumPy array (also known as a matrix) with two rows and three columns.

`{}` → A placeholder that will be replaced by x.

`.format(x)` → This replaces {} in the string with the value of x, which is the NumPy array.

## B.2

In [9]:
# Create a 2D NumPy array with a diagonal of ones, and zeros everywhere else
eye = np.eye(4)
print("NumPy array:\n{}".format(eye))

NumPy array:
[[1. 0. 0. 0.]
 [0. 1. 0. 0.]
 [0. 0. 1. 0.]
 [0. 0. 0. 1.]]


`np.eye(N, M=None, k=0, dtype=float, order='C')` is a NumPy function that returns a 2D array of size N with 1s on the main diagonal and 0s everywhere else.

`k=1`: Moves the diagonal one step above the main diagonal.

`k=-1`: Moves the diagonal below the main diagonal.

You can create a rectangular matrix by specifying the number of columns (`M`)

## B.3

In [10]:
# Convert the NumPy array to a SciPy sparse matrix in CSR format
# Only the nonzero entries are stored
sparse_matrix = sparse.csr_matrix(eye)
print("\nSciPy sparse CSR matrix:\n{}".format(sparse_matrix))


SciPy sparse CSR matrix:
<Compressed Sparse Row sparse matrix of dtype 'float64'
	with 4 stored elements and shape (4, 4)>
  Coords	Values
  (0, 0)	1.0
  (1, 1)	1.0
  (2, 2)	1.0
  (3, 3)	1.0


`sparse.csr_matrix(eye)` – Creating a Sparse Matrix

eye is a NumPy array (typically an identity matrix created using np.eye()).
sparse.csr_matrix(eye) converts this dense matrix into a CSR (Compressed Sparse Row) sparse matrix.
In CSR format, only nonzero elements are stored, reducing memory usage for sparse matrices.

## B.4

In [None]:
 data = np.ones(4)
 row_indices = np.arange(4)
 col_indices = np.arange(4)
 eye_coo = sparse.coo_matrix((data, (row_indices, col_indices)))
 print("COO representation:\n{}".format(eye_coo))

## B.5

In [None]:
 # Generate a sequence of numbers from -10 to 10 with 100 steps in between
 x = np.linspace(-10, 10, 100)
 # Create a second array using sine
 y = np.sin(x)
 # The plot function makes a line chart of one array against another
 plt.plot(x, y, marker="x")

## B.6

In [None]:
# create a simple dataset of people
 data = {'Name': ["John", "Anna", "Peter", "Linda"],
 'Location' : ["New York", "Paris", "Berlin", "London"],
 'Age' : [24, 13, 53, 33]
 }
 data_pandas = pd.DataFrame(data)
 # IPython.display allows "pretty printing" of dataframes
 # in the Jupyter notebook
 display(data_pandas)
 # Select all rows that have an age column greater than 30
 display(data_pandas[data_pandas.Age > 30])

## B.7

In [None]:
print("Python version: {}".format(sys.version))
print("pandas version: {}".format(pd.__version__))
print("matplotlib version: {}".format(matplotlib.__version__))
print("NumPy version: {}".format(np.__version__))
print("SciPy version: {}".format(sp.__version__))
print("IPython version: {}".format(IPython.__version__))
print("scikit-learn version: {}".format(sklearn.__version__))

## B.8

In [None]:
iris_dataset = load_iris()
print("Keys of iris_dataset: \n{}".format(iris_dataset.keys()))
print(iris_dataset['DESCR'][:193] + "\n...")
print("Target names: {}".format(iris_dataset['target_names']))
print("Feature names: \n{}".format(iris_dataset['feature_names']))
print("Type of data: {}".format(type(iris_dataset['data'])))
print("Shape of data: {}".format(iris_dataset['data'].shape))
print("First five columns of data:\n{}".format(iris_dataset['data'][:5]))
print("Type of target: {}".format(type(iris_dataset['target']))) 
print("Shape of target: {}".format(iris_dataset['target'].shape))
print("Target:\n{}".format(iris_dataset['target']))

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
iris_dataset['data'], iris_dataset['target'], random_state=0)
print("X_train shape: {}".format(X_train.shape))
print("y_train shape: {}".format(y_train.shape))
print("X_test shape: {}".format(X_test.shape))
print("y_test shape: {}".format(y_test.shape))
# create dataframe from data in X_train
# label the columns using the strings in iris_dataset.feature_names
iris_dataframe = pd.DataFrame(X_train, columns=iris_dataset.feature_names)
# create a scatter matrix from the dataframe, color by y_train
grr = pd.scatter_matrix(iris_dataframe, c=y_train, figsize=(15, 15), marker='o',
hist_kwds={'bins': 20}, s=60, alpha=.8, cmap=mglearn.cm3)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski', metric_params=None, n_jobs=1, n_neighbors=1, p=2, weights='uniform')
X_new = np.array([[5, 2.9, 1, 0.2]])
print("X_new.shape: {}".format(X_new.shape))
prediction = knn.predict(X_new)
print("Prediction: {}".format(prediction))
print("Predicted target name: {}".format(iris_dataset['target_names'][prediction]))
y_pred = knn.predict(X_test)
print("Test set predictions:\n {}".format(y_pred))
print("Test set score: {:.2f}".format(np.mean(y_pred == y_test)))
print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))
X_train, X_test, y_train, y_test = train_test_split(
iris_dataset['data'], iris_dataset['target'], random_state=0)
knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)
print("Test set score: {:.2f}".format(knn.score(X_test, y_test)))