# Book graphs

This notebooks contains the code to generate a couple of graphs added within the book.

## Figure 1.1
Sharp questions machine learning can answer

In [None]:
import numpy as np
import matplotlib.pyplot as plt

x = np.arange(1,11)
np.random.seed(1773)
y = x+3*np.random.rand(len(x))

a,b = np.polyfit(x,y,1)
yfit = a*x + b

plt.plot(x,y, 'r+')
plt.plot(x, yfit, '--k')

plt.figure(figsize=(10,10), dpi=1200)

In [None]:
from azureml.core import Workspace, Dataset
import matplotlib.pyplot as plt

ws = Workspace.from_config()

dataset = Dataset.get_by_name(ws, name='churn-dataset')
churn_df = dataset.keep_columns(['customer_tenure','churned']).to_pandas_dataframe()

In [None]:
churn_df.sort_values(by=['customer_tenure'], ascending=False, inplace=True)
churn_df.sort_values(by=['churned'], inplace=True)
churn_df.reset_index(inplace=True, drop=True)
churn_df.head()

In [None]:
plt.scatter(churn_df.index,churn_df['customer_tenure'], c=churn_df['churned'])
min_true_index=churn_df[churn_df.churned==True].index[0]
plt.axvline(x=min_true_index)
plt.figure(figsize=(10,10), dpi=1200)

In [None]:
from sklearn.cluster import KMeans
from sklearn.datasets import make_blobs
from sklearn.metrics import euclidean_distances

X, y = make_blobs(n_samples=1500, random_state=1377)

kmeans = KMeans(n_clusters=3,algorithm='full', random_state=1377)
kmeans.fit(X)
y_kmeans = kmeans.predict(X)

plt.scatter(X[:, 0], X[:, 1], c=y_kmeans)

centers = kmeans.cluster_centers_
plt.scatter(centers[:, 0], centers[:, 1], c='red', s=100, marker='x');
for ind,i in enumerate(centers):
    cluster_items=np.where(kmeans.labels_==ind)[0]
    cluster_radius=np.max(euclidean_distances(X[cluster_items]))/2
    plt.gca().add_artist(plt.Circle(i, cluster_radius, fill=False, linestyle='--', color='red'))

plt.xlim(-3,11)
plt.figure(figsize=(10,10), dpi=1200)

In [None]:
bottom_cluster = X[np.where(kmeans.labels_==1)[0]]
bottom_cluster = bottom_cluster[(bottom_cluster[:,1]< -4.5) | (bottom_cluster[:,1] > -3)]
bottom_cluster = bottom_cluster[(bottom_cluster[:,0] > 0) & (bottom_cluster[:,0] < 3)]
plt.scatter(bottom_cluster[:, 0], bottom_cluster[:, 1])
outlier = bottom_cluster[(bottom_cluster[:,1] > -3)][0]

plt.gca().add_artist(plt.Circle(outlier, 0.2, fill=False, linestyle='--', color='red'))

## Figure 9.2

Advanced discrete and continuous hyperparameter value distributions. Sample values are ordered. X axis shows the ordered value’s index number.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

mu=5
sigma=1
sample_rate = 1000
normal_data = np.random.normal(mu, sigma, sample_rate)

low = round(min(normal_data))
high = round(max(normal_data))
uniform_data = np.random.uniform(low, high, sample_rate)


def quniform():
    plt.subplot(2,2,1)
    plt.title(f"Uniform distribution with low={low} and high={high}")
    uniform_data.sort()
    plt.plot(uniform_data, dashes=[6, 2], color="m", label="Continuous uniform")
    for q in [1,2]:
        # round(uniform(low, high) / q) * q
        values = (uniform_data/q).round()*q 
        values.sort()
        plt.plot(values, label=f'Discrete quniform with q={q}')
    plt.legend(loc='best')

def qloguniform():
    plt.subplot(2,2,3)
    plt.title(f"Log uniform distribution with low={low} and high={high}")
    log_uniform_data = np.exp(uniform_data)
    log_uniform_data.sort()
    plt.plot(log_uniform_data, dashes=[6, 2], color="m", label="Continuous loguniform")
    #plt.yscale('log')
    for q in [1,1000]:
        values = (log_uniform_data/q).round()*q 
        values.sort()
        plt.plot(values, label=f'Discrete quniform with q={q}')
    plt.legend(loc='best')

def qnormal():
    plt.subplot(2,2,2)
    plt.title(f"Normal distribution with μ={mu} and σ={sigma}")
    normal_data.sort()
    plt.plot(normal_data, dashes=[6, 2], color="m", label="Continuous normal")
    for q in [1,2]:
        #  round(normal(mu, sigma) / q) * q
        values = (normal_data/q).round()*q 
        values.sort()
        plt.plot(values, label=f'Discrete qnormal with q={q}') 
    plt.legend(loc='best')

def qlognormal():
    plt.subplot(2,2,4)
    plt.title(f"Log normal distribution with μ={mu} and σ={sigma}")
    log_normal_data = np.exp(normal_data)
    log_normal_data.sort()
    plt.plot(log_normal_data, dashes=[6, 2], color="m", label="Continuous lognormal")
    #plt.yscale('log')
    for q in [1,1000]:
        values = (log_normal_data/q).round()*q 
        values.sort()
        plt.plot(values, label=f'Discrete qlognormal with q={q}')
    plt.legend(loc='best')

plt.figure(figsize=(16,12), dpi=1200)

quniform()
qloguniform()
qnormal()
qlognormal()
