In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
X = iris.data # shape (150, 4)
y = iris.target # shape (150,)
print(iris.feature_names,iris.target_names)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("Predictions:", y_pred[:5])
print("True labels:", y_test[:5])

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

from sklearn.metrics import confusion_matrix
confusion = confusion_matrix(y_test, y_pred)
print("Confusion matrix:", confusion)

from sklearn.metrics import classification_report
report = classification_report(y_test, y_pred)
print("Classification report:", report)

from sklearn.neighbors import KNeighborsClassifier
model_knn = KNeighborsClassifier(n_neighbors=5)
model_knn.fit(X_train, y_train)
y_pred_knn = model_knn.predict(X_test)
print("k-NN accuracy:", accuracy_score(y_test, y_pred_knn))

from sklearn.tree import DecisionTreeClassifier
model = DecisionTreeClassifier(max_depth=3, random_state=42)
model.fit(X_train, y_train)
# Further evaluation here...

from sklearn.neighbors import KNeighborsClassifier

# 1. Instantiate (n_neighbors is the number of neighbors to consider)
model_knn = KNeighborsClassifier(n_neighbors=5)

# 2. Fit (Train the model)
model_knn.fit(X_train, y_train)

# 3. Predict (Run on test data)
y_pred_knn = model_knn.predict(X_test)

print("k-NN Accuracy:", accuracy_score(y_test, y_pred_knn))


from sklearn.svm import SVC

# 1. Instantiate (kernel='linear' or 'rbf' are common choices)
model_svm = SVC(kernel='linear', random_state=42)

# 2. Fit (Train the model)
model_svm.fit(X_train, y_train)

# 3. Predict (Run on test data)
y_pred_svm = model_svm.predict(X_test)

print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))



from sklearn.model_selection import GridSearchCV

# Define the model
dtree = DecisionTreeClassifier(random_state=42)

# Create a dictionary of the parameters you want to test
param_grid_dt = {
    'max_depth': [3, 5, 10, None],
    'min_samples_split': [2, 5, 10],
    'criterion': ['gini', 'entropy']
}

# Setup the Grid Search
grid_dt = GridSearchCV(dtree, param_grid_dt, cv=5, scoring='accuracy')

# Fit the Grid Search to find the best settings
grid_dt.fit(X_train, y_train)

# Output the best results
print("Best Decision Tree Params:", grid_dt.best_params_)
print("Best Decision Tree Score:", grid_dt.best_score_)

# Predict using the best version of the model
y_pred_dt_best = grid_dt.predict(X_test)



# Define the model
knn = KNeighborsClassifier()

# Create the parameter grid
param_grid_knn = {
    'n_neighbors': [3, 5, 11, 19],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

# Setup and fit the Grid Search
grid_knn = GridSearchCV(knn, param_grid_knn, cv=5, scoring='accuracy')
grid_knn.fit(X_train, y_train)

print("Best k-NN Params:", grid_knn.best_params_)
print("Best k-NN Score:", grid_knn.best_score_)


import pandas as pd
from sklearn.datasets import load_iris

# Load as DataFrame for easier manipulation
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)

# Create the new "Petal Ratio" feature
df['petal ratio'] = df['petal length (cm)'] / df['petal width (cm)']

# Now use df.values as your X for training
X = df.values 
y = iris.target


# Option A: Using Pandas (Recommended for readability)
X_reduced = df.drop(columns=['sepal width (cm)']).values

# Option B: Using NumPy slicing (Faster for raw arrays)
# iris.data[:, [0, 2, 3]] selects columns 0, 2, and 3, skipping column 1 (sepal width)
X_reduced = iris.data[:, [0, 2, 3]] 




import numpy as np
from sklearn.datasets import load_iris

iris = load_iris()
X = iris.data

# Create petal ratio: column 2 (length) divided by column 3 (width)
petal_ratio = X[:, 2] / X[:, 3]

# Add it back to the dataset as a new column
# np.column_stack combines the original X with our new ratio
X_enhanced = np.column_stack((X, petal_ratio))

print("Original shape:", X.shape)         # (150, 4)
print("Enhanced shape:", X_enhanced.shape) # (150, 5)
