# Unsupervised Learning with K-means

**Attention:** The code in this notebook creates Google Cloud resources that can incur costs.

Refer to the Google Cloud pricing documentation for details.

For example:

* [Vertex AI Pricing](https://cloud.google.com/vertex-ai/pricing)


## Install required packages

In [None]:
! pip3 install --upgrade xgboost

## Restart the kernel

The code in the next cell will retart the kernel, which is sometimes required after installing/upgrading packages.

**When prompted, click OK to restart the kernel.**

The sleep command simply prevents further cells from executing before the kernel restarts.

In [None]:
import IPython

app = IPython.Application.instance()
app.kernel.do_shutdown(True)


In [None]:
import time
time.sleep(10)

# (Wait for kernel to restart before proceeding...)

In [None]:
# Import required resources
from sklearn.cluster import KMeans
from sklearn.datasets import load_iris
import pandas as pd # for exploring our data
import matplotlib.pyplot as plt # for plotting our clusters
from mpl_toolkits.mplot3d import Axes3D # Specifically for creating a 3-D graph

In [None]:
# Load the Iris dataset:
iris = load_iris()
# Assign the data to a variable so we can start to use it:
iris_data = iris.data

In [None]:
# Convert the dataset to a pandas data frame for analysis:
iris_df = pd.DataFrame(iris_data)
# Use the info() function to get some information about the dataset
iris_df.info()

In [None]:
# Preview the data
iris_df.head()

In [None]:
kmeans_model = KMeans(n_clusters=3)
kmeans_model.fit(iris_data)
kmeans_model.predict(iris_data)
labels = kmeans_model.labels_

In [None]:
# Create a figure object:
fig = plt.figure() 
# Define the axes (note: the auto_add_to_figure option will default to False from mpl3.5 onwards):
axes = Axes3D(fig, auto_add_to_figure=False) 
# Add the axes to the figure:
fig.add_axes(axes) 
# Create the scatter plot to graph the outputs from our K-means model:
axes.scatter(iris_data[:, 2], iris_data[:, 3], iris_data[:, 1], c=labels.astype(float)) 
# Set the labels for the X, Y, and Z axes:
axes.set_xlabel("Petal length") 
axes.set_ylabel("Petal width") 
axes.set_zlabel("Sepal width")  

# Supervised Learning: Linear Regression with scikit-learn

In [None]:
# Import necessary resources
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split

In [None]:
# Set the target variable
target = iris_df[[2]]

In [None]:
# Define the input_features
input_feats = iris_df[[0, 1, 3]]

In [None]:
# Split the dataset for training and testing
input_train, input_test, target_train, target_test=train_test_split(input_feats,target,test_size=0.2)

In [None]:
# Create an instance of a LinearRegression model
lreg_model = LinearRegression()

# Train the model by fitting it to the training data
lreg_model.fit(input_train,target_train)

# Use the test set to generate predictions
target_predictions=lreg_model.predict(input_test)

In [None]:
# Preview the predictions in a pandas data frame
pred_df = pd.DataFrame(target_predictions[0:5])
pred_df.head()

In [None]:
# Preview the known, correct answers
target_test.head()

In [None]:
# Calculate the Mean Squared Error (MSE)
mean_squared_error(target_test,target_predictions)

# Supervised Learning: Multi-class Classification with XGBoost

In [None]:
# Import required resources
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

In [None]:
# Assign the target variable
iris_classes = iris.target

In [None]:
# View the classes
iris_classes

In [None]:
# Split the data for training and testing
xgb_input_train, xgb_input_test, xgb_target_train, xgb_target_test = train_test_split(iris_data, iris_classes, test_size=.2)

In [None]:
# create model instance
xgbc = XGBClassifier(n_estimators=2, max_depth=2, learning_rate=1, objective='multi:softmax')

In [None]:
# Train the model
xgbc.fit(xgb_input_train, xgb_target_train)

In [None]:
# make predictions
xgb_predictions = xgbc.predict(xgb_input_test)

In [None]:
# View the predictions
xgb_predictions

In [None]:
# View the known, correct answers
xgb_target_test

In [None]:
# Calculate the accuracy of the predictions
accuracy_score(xgb_target_test,xgb_predictions)