# Decision Tree, KNN, Naive Bayes, SVM

In [None]:
# Decision Tree

In [None]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
import kagglehub
from kagglehub import KaggleDatasetAdapter

# Load dataset
file_path = "play_tennis.csv"
df = kagglehub.load_dataset(
    KaggleDatasetAdapter.PANDAS,
    "fredericobreno/play-tennis",
    file_path
)

print("First 5 records:")
print(df.head())

# Drop the 'day' column — it's just an identifier
df = df.drop('day', axis=1)

# One-hot encode categorical variables
data = pd.get_dummies(df)

# Set up features and target
x = data.drop('play_Yes', axis=1)
y = data['play_Yes']

# Train model
model = DecisionTreeClassifier()
model.fit(x, y)

# Predict on the first row
print("Prediction:", model.predict(x.iloc[[0]]))

# The Kaggle link for the data set is -> https://www.kaggle.com/datasets/fredericobreno/play-tennis

In [None]:
# K-Nearest Neighbors

In [None]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split

iris = load_iris()
# Internal dataset of sklearn is used here

df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target
print('First 5 records of the data: ')
#print(df) -> Do this if you want to see the data
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.3)
model = KNeighborsClassifier(n_neighbors=3)
model.fit(x_train, y_train)
print("Accuracy:", model.score(x_test, y_test))

In [None]:
# Naive Bayes

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
import pandas as pd

data = load_breast_cancer()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
print(df) #To see the data
x_train, x_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.3)
model = GaussianNB()
model.fit(x_train, y_train)
print("Accuracy:", model.score(x_test, y_test))

In [None]:
# Support Vector Machine

In [None]:
from sklearn.datasets import load_breast_cancer
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

# Load built-in dataset
data = load_breast_cancer()

# Features and target
x = data.data
y = data.target  # 0 = malignant, 1 = benign

# Train-test split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# SVM classifier
model = SVC(kernel='linear')
model.fit(x_train, y_train)

# Evaluate accuracy
print("Accuracy:", model.score(x_test, y_test))


#Alternative code if we want graph as well :-
'''
As this is a very high dimensional data set, graph is not possible but we can do it if we consider only 2 features and the code for that is :

import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# Load data
data = load_breast_cancer()
X = data.data[:, [0, 1]]  # Use only first 2 features: mean radius and mean texture
y = data.target

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train SVM
model = SVC(kernel='linear')
model.fit(X_train, y_train)

# Accuracy
print("Accuracy:", model.score(X_test, y_test))

# Plotting function
def plot_decision_boundary(X, y, model, title):
    h = 0.01  # mesh step size
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1

    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))

    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    plt.contourf(xx, yy, Z, alpha=0.3, cmap=plt.cm.coolwarm)
    plt.scatter(X[:, 0], X[:, 1], c=y, edgecolors='k', cmap=plt.cm.coolwarm)
    plt.xlabel('Mean Radius')
    plt.ylabel('Mean Texture')
    plt.title(title)
    plt.show()

# Plot
plot_decision_boundary(X_train, y_train, model, "SVM Decision Boundary (Train)")
plot_decision_boundary(X_test, y_test, model, "SVM Decision Boundary (Test)")

'''
