# Support Vector Machines (SVM) Classification

In this exercise, you will work with a synthetic dataset and implement a Support Vector Machines (SVM) classifier using scikit-learn. SVM is a powerful algorithm for binary classification tasks and is widely used in various applications. Your task is to train the SVM model on the given dataset and evaluate its performance using accuracy.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
# Generate a 2D grid of points
n = 60

x = np.linspace(-2*np.pi, 3*np.pi, n)
y = np.linspace(-2*np.pi, 3*np.pi, n)

# TODO: Create a meshgrid of x and y
X, Y = # TODO
# What does meshgrid do?

# TODO: Calculate the z values as the cos(x) + cos(y) + noise with std=1/5
Z = # TODO

# What does this do?
X = np.hstack((X.reshape((n**2, 1)), Y.reshape((n**2, 1))))
y = (Z > 0).reshape(n**2, 1)

# TODO: Split the data into training and testing sets
test_size = # TODO
X_train, X_test, y_train, y_test = # TODO

# Why do we need to split our data?

# Reshape the target variables
n_samples = n ** 2
y_train = y_train.reshape((int(n_samples * (1 - test_size)), 1))
y_test = y_test.reshape((int(n_samples * test_size), 1))

# Print the shapes of the training and testing sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print()
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

In [None]:
# TODO: Visualize the data
# Create a 1x2 grid for subplots
plt.subplot(1, 2, 1)

# TODO: Display the heatmap for Z using the 'hot' colormap
plt.imshow(# TODO, cmap=# TODO)
# Add a colorbar to the first subplot with shrink factor 0.49 to adjust its size
plt.colorbar(shrink=0.49)

# Set the title for the first subplot
plt.title("Heatmap of Z")
# Set the axis title for the first subplot
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")

# Set the second subplot
plt.subplot(1, 2, 2)

# TODO: Display the heatmap for Y (reshaped to an n x n matrix) without specifying the colormap (uses default)
plt.imshow(# TODO)
# Add a colorbar to the second subplot with shrink factor 0.49 to adjust its size
plt.colorbar(shrink=0.49)

# Set the title for the second subplot
plt.title("Heatmap of Y")
# Set the axis title for the second subplot
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")

# Show the plot
plt.show()

In [None]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [None]:
# TODO: Fit the SVM model first with a linear kernel
svm_classifier = # TODO
svm_classifier.fit(# TODO)

# TODO: Predict the train and test datasets
y_train_pred = # TODO
y_test_pred = # TODO

# TODO: Assess the accuracy of the model
train_accuracy = # TODO
test_accuracy = # TODO

# Comment the results
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

In [None]:
# Visualize the decision boundary and data points
plt.figure(figsize=(8, 6))

# Plot the decision boundary
h = .02  # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = svm_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap='viridis', alpha=0.6)

# TODO: Plot the test data points
plt.scatter(# TODO, # TODO, c=y_test, cmap='viridis', edgecolor='black', linewidth=1, marker='o', label='Test Data')

plt.title('SVM Classifier Decision Boundary')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.colorbar(ticks=[0, 1], label='Class')
plt.legend()
plt.grid()

plt.show()

In [None]:
# TODO: Fit the SVM model first with a rbf kernel
svm_classifier = # TODO
svm_classifier.fit(# TODO)

# TODO: Predict the train and test datasets
y_train_pred = # TODO
y_test_pred = # TODO

# TODO: Assess the accuracy of the model
train_accuracy = # TODO
test_accuracy = # TODO

# Comment the results
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

In [None]:
# Visualize the decision boundary and data points
plt.figure(figsize=(8, 6))

# Plot the decision boundary
h = .02  # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = svm_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap='viridis', alpha=0.6)

# TODO: Plot the test data points
plt.scatter(# TODO, # TODO, c=y_test, cmap='viridis', edgecolor='black', linewidth=1, marker='o', label='Test Data')

plt.title('SVM Classifier Decision Boundary')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.colorbar(ticks=[0, 1], label='Class')
plt.legend()
plt.grid()

plt.show()

# Random Forest (RF) Classification

In this exercise, you will work with a synthetic dataset and implement a Random Forest (RF) classifier using scikit-learn. Random Forest is a powerful algorithm for binary classification tasks and is widely used in various applications. Your task is to train the RF model on the given dataset and evaluate its performance using accuracy.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
# Generate a 2D grid of points
n = 60

x = np.linspace(-2*np.pi, 3*np.pi, n)
y = np.linspace(-2*np.pi, 3*np.pi, n)

# TODO: Create a meshgrid of x and y
X, Y = # TODO
# What does meshgrid do?

# TODO: Calculate the z values as the cos(x) + cos(y) + noise with std=1/5
Z = # TODO

# What does this do?
X = np.hstack((X.reshape((n**2, 1)), Y.reshape((n**2, 1))))
y = (Z > 0).reshape(n**2, 1)

# TODO: Split the data into training and testing sets
test_size = # TODO
X_train, X_test, y_train, y_test = # TODO

# Why do we need to split our data?

# Reshape the target variables
n_samples = n ** 2
y_train = y_train.reshape((int(n_samples * (1 - test_size)), 1))
y_test = y_test.reshape((int(n_samples * test_size), 1))

# Print the shapes of the training and testing sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print()
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

In [None]:
# TODO: Visualize the data
# Create a 1x2 grid for subplots
plt.subplot(1, 2, 1)

# TODO: Display the heatmap for Z using the 'hot' colormap
plt.imshow(# TODO, cmap=# TODO)
# Add a colorbar to the first subplot with shrink factor 0.49 to adjust its size
plt.colorbar(shrink=0.49)

# Set the title for the first subplot
plt.title("Heatmap of Z")
# Set the axis title for the first subplot
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")

# Set the second subplot
plt.subplot(1, 2, 2)

# TODO: Display the heatmap for Y (reshaped to an n x n matrix) without specifying the colormap (uses default)
plt.imshow(# TODO)
# Add a colorbar to the second subplot with shrink factor 0.49 to adjust its size
plt.colorbar(shrink=0.49)

# Set the title for the second subplot
plt.title("Heatmap of Y")
# Set the axis title for the second subplot
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")

# Show the plot
plt.show()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
# TODO: Fit the Random Forest model on the training dataset
rf_classifier = # TODO
rf_classifier.fit(# TODO)

# TODO: Predict the classes for both the training and testing datasets
y_train_pred = # TODO
y_test_pred = # TODO

# TODO: Assess the accuracy of the model and comment on the results
train_accuracy = # TODO
test_accuracy = # TODO

print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)

In [None]:
# Visualize the decision boundary and data points
plt.figure(figsize=(8, 6))

# Plot the decision boundary
h = .02  # step size in the mesh
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
Z = rf_classifier.predict(np.c_[xx.ravel(), yy.ravel()])
Z = Z.reshape(xx.shape)
plt.contourf(xx, yy, Z, cmap='viridis', alpha=0.6)

# TODO: Plot the test data points
plt.scatter(# TODO, # TODO, c=y_test, cmap='viridis', edgecolor='black', linewidth=1, marker='o', label='Test Data')

plt.title('RF Classifier Decision Boundary')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.colorbar(ticks=[0, 1], label='Class')
plt.legend()
plt.grid()

plt.show()

In [None]:
# How can you achieve better results than the previous models using logistic regression?