In [None]:
#KNN

In [None]:
# Import libraries
import pandas as pd
from sklearn.datasets import load_iris

# Load the iris dataset
iris_dataset = load_iris()

# Display the feature names of the iris dataset
iris_dataset.feature_names

In [None]:
# Display target names of iris dataset
iris_dataset.target_names

In [None]:
# Create a DataFrame from iris dataset
df = pd.DataFrame(iris_dataset.data, columns=iris_dataset.feature_names)

# Display some rows of the DataFrame
df.head()

In [None]:
# Add target column to the DataFrame
df['target'] = iris_dataset.target
df.head()

In [None]:
# Map target values to flower names and add as a new column
df['flower_name'] = df.target.apply(lambda x: iris_dataset.target_names[x])
df.head(10)

In [None]:
# Display the first few rows where the target is 1 (versicolor)
df[df.target==1].head()

In [None]:
# Display rows 45 to 54 of DataFrame
df[45:55]

In [None]:
df0 = df[:50]
df1 = df[50:100]
df2 = df[100:]

In [None]:
import matplotlib.pyplot as plt

# Set labels for x and y axes
plt.xlabel('Sepal Length')
plt.ylabel('Sepal Width')

# Plot scatter plot for each subset of the DataFrame with different colors and markers
plt.scatter(df0['sepal length (cm)'], df0['sepal width (cm)'], color="green", marker='+', label='Setosa')

plt.scatter(df1['sepal length (cm)'], df1['sepal width (cm)'], color="blue", marker='^', label='Versicolor')

plt.scatter(df2['sepal length (cm)'], df2['sepal width (cm)'], color="red", marker='*', label='Virginica')

# Add legend to the plot to differentiate between species
plt.legend()

In [None]:
# Set labels for x and y axes to Petal Length and Petal Width
plt.xlabel('Petal Length')
plt.ylabel('Petal Width')

plt.scatter(df0['petal length (cm)'], df0['petal width (cm)'], color="green", marker='+')

plt.scatter(df1['petal length (cm)'], df1['petal width (cm)'], color="blue", marker='^')

plt.scatter(df2['petal length (cm)'], df2['petal width (cm)'], color="red", marker='*')

In [None]:
from sklearn.model_selection import train_test_split

# Drop the target and flower_name columns to create feature set
x = df.drop(['target', 'flower_name'], axis='columns')

# Define the target variable
y = df.target

# Split the dataset into training and testing sets
# test_size=0.3 means 30% of the data will be used for testing, and 70% for training
# random_state=42 ensures reproducibility of the split by setting a seed for the random number generator
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

# Initialize K-Nearest Neighbors classifier with 3 neighbors
knn = KNeighborsClassifier(n_neighbors=3)

In [None]:
# Train the K-Nearest Neighbors classifier using the training data
knn.fit(X_train, y_train)

In [None]:
# Evaluate the accuracy of the K-Nearest Neighbors classifier on the test data
knn.score(X_test, y_test)

In [None]:
from sklearn.metrics import confusion_matrix

# Predict the target values for the test set
y_pred = knn.predict(X_test)

# Compute the confusion matrix to evaluate the accuracy of the classification
cm = confusion_matrix(y_test, y_pred)

# Display the confusion matrix
cm

In [None]:
import seaborn as sn

# Set the figure size for the heatmap
plt.figure(figsize=(10,5))

# Create a heatmap to visualize the confusion matrix
sn.heatmap(cm, annot=True)

# Set the labels for the x and y axes
plt.xlabel('Predicted', fontsize=15)
plt.ylabel('Truth', fontsize=15)

In [None]:
from sklearn.metrics import classification_report

# Generate and display the classification report
classification_report(y_test, y_pred)

In [None]:
#DECISION TREE

In [None]:
# Import libraries
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.metrics import accuracy_score

In [None]:
# Load the iris dataset
iris = load_iris()

# Extract features and target variables
X = iris.data
y = iris.target

# Get feature names and target names
feature_names = iris.feature_names
target_names = iris.target_names

In [None]:
# Split the dataset into training and testing sets
# 80% of the data will be used for training and 20% for testing
# random_state=42 ensures reproducibility of the split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
dt = DecisionTreeClassifier()

In [None]:
dt.fit(X_train, y_train)

In [None]:
# Predict the target values for the test set using the trained Decision Tree model
y_pred_dt = dt.predict(X_test)
y_pred_dt

In [None]:
# Calculate and display the accuracy of the Decision Tree model
print(f"Decision Tree Accuracy: {accuracy_score(y_test, y_pred_dt):.2f}")

In [None]:
# Plot the decision tree with feature and class names
plt.figure(figsize=(15,10))  # Set the size of the plot
plot_tree(dt, filled=True, feature_names=feature_names, class_names=target_names)  # Plot the decision tree with filled nodes, feature names, and class names
plt.title('Decision Tree', fontsize=30)  # Set the title of the plot with a font size of 30
plt.show()  # Display the plot

In [None]:
#RANDOM FOREST

In [None]:
# Import the RandomForestClassifier from the sklearn.ensemble module
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Initialize the Random Forest Classifier with 150 trees and a fixed random state for reproducibility
rf = RandomForestClassifier(n_estimators=150, random_state=42)

In [None]:
rf.fit(X_train, y_train)

In [None]:
# Predict the target values for the test set using the trained Random Forest model
y_pred_rf = rf.predict(X_test)

y_pred_rf

In [None]:
print(f"Random Forest Accuracy: {accuracy_score(y_test, y_pred_rf):.2f}")