In [8]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, mean_absolute_error, mean_squared_error
from sklearn.preprocessing import LabelEncoder # Import LabelEncoder for encoding categorical features

# Load datasets
iris = pd.read_csv("iris.csv")

# --- Decision Tree Classifier for IRIS dataset ---
X_iris = iris.iloc[:, :-1]
y_iris = iris.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X_iris, y_iris, test_size=0.2, random_state=42)

dtc = DecisionTreeClassifier()
dtc.fit(X_train, y_train)
y_pred = dtc.predict(X_test)

print("Decision Tree Classification for IRIS Dataset:")
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:",metrics.precision_score(y_test, y_pred, average='macro', zero_division=0))
print("Recall:",metrics.recall_score(y_test, y_pred, average='macro', zero_division=0))
print("F1 Score:",metrics.f1_score(y_test, y_pred, average='macro', zero_division=0))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Decision Tree Classification for IRIS Dataset:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
Classification Report:
                  precision    recall  f1-score   support

    Iris-setosa       1.00      1.00      1.00        10
Iris-versicolor       1.00      1.00      1.00         9
 Iris-virginica       1.00      1.00      1.00        11

       accuracy                           1.00        30
      macro avg       1.00      1.00      1.00        30
   weighted avg       1.00      1.00      1.00        30



In [16]:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn import metrics
from sklearn.preprocessing import LabelEncoder



iris_df = pd.read_csv("iris.csv")



print(iris_df.head())
print(iris_df.columns)

features = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width']
target = 'species'

if not all(col in iris_df.columns for col in features + [target]):
    print("Error: Required columns not found in the CSV file.")
    print("Expected columns:", features + [target])
    print("Found columns:", iris_df.columns.tolist())
    exit()


X = iris_df[features]
y = iris_df[target]

le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.3, random_state=42, stratify=y_encoded) # stratify ensures proportional representation of classes

clf = svm.SVC(kernel='linear', random_state=42)

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("SVM Model Evaluation:")
print("Accuracy:", metrics.accuracy_score(y_test, y_pred))

print("Precision:", metrics.precision_score(y_test, y_pred, average='macro'))
print("Recall:", metrics.recall_score(y_test, y_pred, average='macro'))
print("F1 Score:", metrics.f1_score(y_test, y_pred, average='macro'))

from sklearn.model_selection import StratifiedKFold
import numpy as np

from sklearn.model_selection import StratifiedKFold, KFold
import numpy as np
print("\nStratified K-Fold Cross-Validation (k=5):")
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
skf_accuracies = []
for fold, (train_index, test_index) in enumerate(skf.split(X, y_encoded)):
    X_train_fold, X_test_fold = X.iloc[train_index], X.iloc[test_index]
    y_train_fold, y_test_fold = y_encoded[train_index], y_encoded[test_index]
    clf_skf = svm.SVC(kernel='linear', random_state=42)
    clf_skf.fit(X_train_fold, y_train_fold)
    y_pred_fold = clf_skf.predict(X_test_fold)
    fold_accuracy = metrics.accuracy_score(y_test_fold, y_pred_fold)
    skf_accuracies.append(fold_accuracy)
    print(f"  Fold {fold+1} Accuracy: {fold_accuracy:.4f}")
print(f"  Average Stratified K-Fold Accuracy: {np.mean(skf_accuracies):.4f}")
# Add K-Fold Cross-Validation
print("\nK-Fold Cross-Validation (k=5):")
kf = KFold(n_splits=5, shuffle=True, random_state=42)
kf_accuracies = []
for fold, (train_index, test_index) in enumerate(kf.split(X, y_encoded)):
    X_train_fold, X_test_fold = X.iloc[train_index], X.iloc[test_index]
    y_train_fold, y_test_fold = y_encoded[train_index], y_encoded[test_index]
    clf_kf = svm.SVC(kernel='linear', random_state=42)
    clf_kf.fit(X_train_fold, y_train_fold)
    y_pred_fold = clf_kf.predict(X_test_fold)
    fold_accuracy = metrics.accuracy_score(y_test_fold, y_pred_fold)
    kf_accuracies.append(fold_accuracy)
    print(f"  Fold {fold+1} Accuracy: {fold_accuracy:.4f}")
print(f"  Average K-Fold Accuracy: {np.mean(kf_accuracies):.4f}")

   sepal_length  sepal_width  petal_length  petal_width      species
0           5.1          3.5           1.4          0.2  Iris-setosa
1           4.9          3.0           1.4          0.2  Iris-setosa
2           4.7          3.2           1.3          0.2  Iris-setosa
3           4.6          3.1           1.5          0.2  Iris-setosa
4           5.0          3.6           1.4          0.2  Iris-setosa
Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')
SVM Model Evaluation:
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1 Score: 1.0

Stratified K-Fold Cross-Validation (k=5):
  Fold 1 Accuracy: 1.0000
  Fold 2 Accuracy: 1.0000
  Fold 3 Accuracy: 0.9333
  Fold 4 Accuracy: 1.0000
  Fold 5 Accuracy: 1.0000
  Average Stratified K-Fold Accuracy: 0.9867

K-Fold Cross-Validation (k=5):
  Fold 1 Accuracy: 1.0000
  Fold 2 Accuracy: 1.0000
  Fold 3 Accuracy: 0.9667
  Fold 4 Accuracy: 0.9333
  Fold 5 Accuracy: 0.9667
  Average K-Fold Ac