In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Loading data
iris = load_iris()
X = iris.data[:, :2]  # Using  sepal_length and sepal_width for visualization
y = iris.target

# Binary classification (versicolor vs virginica)
X = X[(y == 1) | (y == 2)]
y = y[(y == 1) | (y == 2)]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)
print(f"k-NN Accuracy: {knn.score(X_test, y_test):.2f}")

from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
lr.fit(X_train, y_train)
print(f"LogReg Accuracy: {lr.score(X_test, y_test):.2f}")

from sklearn.svm import SVC
svm = SVC(kernel='linear')
svm.fit(X_train, y_train)
print(f"SVM Accuracy: {svm.score(X_test, y_test):.2f}")

from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier(max_depth=3)
tree.fit(X_train, y_train)
print(f"Tree Accuracy: {tree.score(X_test, y_test):.2f}")

from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(n_estimators=100)
rf.fit(X_train, y_train)
print(f"RF Accuracy: {rf.score(X_test, y_test):.2f}")

import matplotlib.pyplot as plt
import numpy as np

#
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                     np.arange(y_min, y_max, 0.02))

# Plot for each method
for clf, title in zip([knn, lr, svm, tree, rf], 
                      ['k-NN', 'LogReg', 'SVM', 'Decision Tree', 'Random Forest']):
    Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    
    plt.figure()
    plt.contourf(xx, yy, Z, alpha=0.4)
    plt.scatter(X[:, 0], X[:, 1], c=y, s=20, edgecolor='k')
    plt.title(title)
    plt.xlabel('Sepal length')
    plt.ylabel('Sepal width')
plt.show()


import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# 
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target_names[iris.target]
df_binary = df[df['species'].isin(['versicolor', 'virginica'])].copy()

# new features 
df_binary['petal_sepal_ratio'] = df_binary['petal length (cm)'] / df_binary['sepal length (cm)']
df_binary['sepal_petal_area'] = df_binary['sepal length (cm)'] * df_binary['petal length (cm)']

# 
original_features = ['sepal length (cm)', 'petal length (cm)']
enhanced_features = original_features + ['petal_sepal_ratio', 'sepal_petal_area']

# 
X_train, X_test, y_train, y_test = train_test_split(
    df_binary[enhanced_features], 
    df_binary['species'],
    test_size=0.3,
    random_state=42
)
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df_binary, x='sepal length (cm)', y='petal length (cm)', hue='species')
plt.title("Versicolor vs Virginica (Sepal Length vs Petal Length)")
plt.show()

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
sns.boxplot(data=df_binary, x='species', y='sepal length (cm)')
plt.subplot(1, 2, 2)
sns.boxplot(data=df_binary, x='species', y='petal length (cm)')
plt.tight_layout()
plt.show()

plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
sns.violinplot(data=df_binary, x='species', y='sepal length (cm)')
plt.subplot(1, 2, 2)
sns.violinplot(data=df_binary, x='species', y='petal length (cm)')
plt.tight_layout()
plt.show()

# 
df_binary['petal_sepal_ratio'] = df_binary['petal length (cm)'] / df_binary['sepal length (cm)']

# 
df_binary['sepal_petal_area'] = df_binary['sepal length (cm)'] * df_binary['petal length (cm)']

# 
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
sns.boxplot(data=df_binary, x='species', y='petal_sepal_ratio')
plt.subplot(1, 2, 2)
sns.boxplot(data=df_binary, x='species', y='sepal_petal_area')
plt.tight_layout()
plt.show()

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# Scale features
scaler_orig = StandardScaler().fit(X_train[original_features])
scaler_enh = StandardScaler().fit(X_train[enhanced_features])

# Transforming features
X_train_orig = scaler_orig.transform(X_train[original_features])
X_test_orig = scaler_orig.transform(X_test[original_features])

X_train_enh = scaler_enh.transform(X_train[enhanced_features])
X_test_enh = scaler_enh.transform(X_test[enhanced_features])

#models training
model_orig = LogisticRegression(max_iter=1000).fit(X_train_orig, y_train)
model_enh = LogisticRegression(max_iter=1000).fit(X_train_enh, y_train)

#
print(f"Original Accuracy: {model_orig.score(X_test_orig, y_test):.4f}")
print(f"Enhanced Accuracy: {model_enh.score(X_test_enh, y_test):.4f}")


import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

#Loading and preparing data
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['species'] = iris.target_names[iris.target]

#binary classification (versicolor vs virginica)
df_binary = df[df['species'].isin(['versicolor', 'virginica'])]

####
df_binary['petal_sepal_ratio'] = df_binary['petal length (cm)'] / df_binary['sepal length (cm)']
df_binary['sepal_petal_area'] = df_binary['sepal length (cm)'] * df_binary['petal length (cm)']

#Converting to NumPy arrays
# Original features
X_orig = df_binary[['sepal length (cm)', 'petal length (cm)']].values
# Enhanced features
X_enh = df_binary[['sepal length (cm)', 'petal length (cm)', 
                  'petal_sepal_ratio', 'sepal_petal_area']].values
y = df_binary['species'].values

#Splitting data (75% train, 25% validation)
# For original features
X_train_orig, X_val_orig, y_train, y_val = train_test_split(
    X_orig, y, 
    test_size=0.25,  # 25% validation
    random_state=42,  # for reproducibility
    stratify=y       # maintain class balance
)

# For enhanced features
X_train_enh, X_val_enh, _, _ = train_test_split(
    X_enh, y,
    test_size=0.25,
    random_state=42,
    stratify=y
)

##
print("Original features:")
print(f"Train: {X_train_orig.shape}, Validation: {X_val_orig.shape}\n")

print("Enhanced features:")

print(f"Train: {X_train_enh.shape}, Validation: {X_val_enh.shape}")


from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def train_and_evaluate(X_train, X_val, y_train, y_val):
    """Helper function to standardize and evaluate models"""
    # 
    scaler = StandardScaler().fit(X_train)
    
    #
    X_train_scaled = scaler.transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    
    # 
    model = LogisticRegression(max_iter=1000, random_state=42)
    model.fit(X_train_scaled, y_train)
    
    # 
    train_acc = accuracy_score(y_train, model.predict(X_train_scaled))
    val_acc = accuracy_score(y_val, model.predict(X_val_scaled))
    
    return train_acc, val_acc

# Evaluating original features
orig_train_acc, orig_val_acc = train_and_evaluate(X_train_orig, X_val_orig, y_train, y_val)

# Evaluating enhanced features
enh_train_acc, enh_val_acc = train_and_evaluate(X_train_enh, X_val_enh, y_train, y_val)

print("Original Features (2D):")
print(f"  Training Accuracy: {orig_train_acc:.4f}")
print(f"  Validation Accuracy: {orig_val_acc:.4f}\n")

print("Enhanced Features (4D):")
print(f"  Training Accuracy: {enh_train_acc:.4f}")
print(f"  Validation Accuracy: {enh_val_acc:.4f}")


from sklearn.neighbors import KNeighborsClassifier

def evaluate_knn(X_train, X_val, y_train, y_val, feature_set_name):
    """Evaluate KNN with different neighbor values"""
    print(f"\nEvaluating {feature_set_name} features:")
    
    for n in [1, 3, 5]:
        # 
        knn = KNeighborsClassifier(n_neighbors=n)
        knn.fit(X_train, y_train)
        
        #predictions
        y_pred = knn.predict(X_val)
        
        #accuracy
        acc = accuracy_score(y_val, y_pred)
        print(f"{n}-NN Accuracy: {acc:.4f}")

#
scaler_orig = StandardScaler().fit(X_train_orig)
X_train_orig_std = scaler_orig.transform(X_train_orig)
X_val_orig_std = scaler_orig.transform(X_val_orig)

scaler_enh = StandardScaler().fit(X_train_enh)
X_train_enh_std = scaler_enh.transform(X_train_enh)
X_val_enh_std = scaler_enh.transform(X_val_enh)

#
evaluate_knn(X_train_orig_std, X_val_orig_std, y_train, y_val, "Original (2D)")
evaluate_knn(X_train_enh_std, X_val_enh_std, y_train, y_val, "Enhanced (4D)")


from sklearn.metrics import (accuracy_score, precision_score, 
                           recall_score, f1_score, confusion_matrix)

def evaluate_model(model, X_val, y_val, positive_class='virginica'):
    """Calculate and print all evaluation metrics"""
    y_pred = model.predict(X_val)
    
    # Calculating metrics
    acc = accuracy_score(y_val, y_pred)
    prec = precision_score(y_val, y_pred, pos_label=positive_class)
    rec = recall_score(y_val, y_pred, pos_label=positive_class)
    f1 = f1_score(y_val, y_pred, pos_label=positive_class)
    cm = confusion_matrix(y_val, y_pred)
    
    # results
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-score: {f1:.4f}")
    print("\nConfusion Matrix:")
    print(cm)
    
    return cm

# Evaluating 3-NN model 
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_enh_std, y_train)
print("Evaluation for 3-NN with Enhanced Features:")
cm = evaluate_model(knn, X_val_enh_std, y_val)


import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

#
def decision_region(X, y, model, step=0.01, title='decision region', 
                   xlabel='xlabel', ylabel='ylabel', 
                   target_names=['versicolor', 'virginica']):
    """
    Draw the decision region of a binary classification model with two-dimensional features.
    """
    #markers and colors
    scatter_color = ['red', 'blue']
    contourf_color = ['pink', 'skyblue']
    n_class = 2

    # decision boundary
    f0_min, f0_max = X[:, 0].min()-0.5, X[:, 0].max()+0.5
    f1_min, f1_max = X[:, 1].min()-0.5, X[:, 1].max()+0.5
    xx, yy = np.meshgrid(np.arange(f0_min, f0_max, step),
                         np.arange(f1_min, f1_max, step))
    
    # 
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = np.where(Z == 'versicolor', 0, 1).reshape(xx.shape)
    
    # decision regions
    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, alpha=0.4, cmap=ListedColormap(contourf_color))
    plt.contour(xx, yy, Z, colors='y', linewidths=1, alpha=0.5)
    
    # 
    for i, class_val in enumerate(['versicolor', 'virginica']):
        plt.scatter(X[y == class_val][:, 0], X[y == class_val][:, 1], 
                    color=scatter_color[i], label=target_names[i], 
                    edgecolor='black', s=60)
    
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.legend()
    plt.show()

#
iris = load_iris()
X = iris.data[:, :2]  # Using only sepal length and sepal width
y = iris.target_names[iris.target]

# versicolor (1) and virginica (2)
mask = (y == 'versicolor') | (y == 'virginica')
X = X[mask]
y = y[mask]

# Splitting into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, 
                                                random_state=42, stratify=y)

# 
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_val_std = scaler.transform(X_val)

#KNN model (3-NN)
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_std, y_train)

# decision regions
print("Training Data Decision Regions:")
decision_region(X_train_std, y_train, knn,
               title='3-NN Decision Regions (Training Data)',
               xlabel='Standardized Sepal Length',
               ylabel='Standardized Sepal Width')

print("\nValidation Data Decision Regions:")
decision_region(X_val_std, y_val, knn,
               title='3-NN Decision Regions (Validation Data)',
               xlabel='Standardized Sepal Length',
               ylabel='Standardized Sepal Width')


import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_score, 
                           recall_score, f1_score, confusion_matrix)

# Loading data
iris = load_iris()
X = iris.data[:, :2]  # Using sepal length and width
y = iris.target_names[iris.target]
X = X[(y == 'versicolor') | (y == 'virginica')]
y = y[(y == 'versicolor') | (y == 'virginica')]

# 
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=42)

# 
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)

# 
models = {
    'KNN': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'SVM': SVC(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

# Evaluation function
def evaluate_model(model, X_val, y_val):
    y_pred = model.predict(X_val)
    return {
        'Accuracy': accuracy_score(y_val, y_pred),
        'Precision': precision_score(y_val, y_pred, pos_label='virginica'),
        'Recall': recall_score(y_val, y_pred, pos_label='virginica'),
        'F1': f1_score(y_val, y_pred, pos_label='virginica')
    }

# 
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    results[name] = evaluate_model(model, X_val, y_val)
    
    # Visualization
    plt.figure()
    decision_region(X_val, y_val, model, 
                   title=f'{name} Decision Regions',
                   xlabel='Sepal Length (standardized)',
                   ylabel='Sepal Width (standardized)')
    plt.show()

# 
results_df = pd.DataFrame(results).T
print(results_df)


import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

#
iris = load_iris()
X = iris.data[:, :2]  # sepal length and width
y = iris.target_names[iris.target]
X = X[(y == 'versicolor') | (y == 'virginica')]
y = y[(y == 'versicolor') | (y == 'virginica')]

#
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.25, random_state=42)

#
models = {
    'KNN': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'SVM': SVC(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

#
results = {}
scaler = StandardScaler()

for name, model in models.items():
    # Without standardization
    model.fit(X_train, y_train)
    acc_raw = accuracy_score(y_val, model.predict(X_val))
    
    # With standardization
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    model.fit(X_train_scaled, y_train)
    acc_scaled = accuracy_score(y_val, model.predict(X_val_scaled))
    
    results[name] = {
        'Raw Accuracy': acc_raw,
        'Scaled Accuracy': acc_scaled,
        'Difference': acc_scaled - acc_raw
    }

#comparison table
comparison = pd.DataFrame(results).T.sort_values('Difference', ascending=False)
print(comparison)



import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_score, 
                           recall_score, f1_score, confusion_matrix, 
                           classification_report)

#
iris = load_iris()
X = iris.data[:, :2]  # Using sepal length and width for visualization
y = iris.target_names[iris.target]

#
X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

#
scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_val_std = scaler.transform(X_val)

#
models = {
    'KNN': KNeighborsClassifier(),
    'Logistic Regression': LogisticRegression(max_iter=1000),
    'SVM': SVC(decision_function_shape='ovo'),  # One-vs-One for multiclass
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier()
}

# Multiclass evaluation function
def evaluate_multiclass(model, X_val, y_val):
    y_pred = model.predict(X_val)
    
    #metrics
    metrics = {
        'Accuracy': accuracy_score(y_val, y_pred),
        'Precision (Macro)': precision_score(y_val, y_pred, average='macro'),
        'Recall (Macro)': recall_score(y_val, y_pred, average='macro'),
        'F1 (Macro)': f1_score(y_val, y_pred, average='macro'),
        'Precision (Micro)': precision_score(y_val, y_pred, average='micro'),
        'Recall (Micro)': recall_score(y_val, y_pred, average='micro'),
        'F1 (Micro)': f1_score(y_val, y_pred, average='micro')
    }
    
    # Confusion matrix
    cm = confusion_matrix(y_val, y_pred)
    
    return metrics, cm

#
results = {}
for name, model in models.items():
    model.fit(X_train_std, y_train)
    metrics, cm = evaluate_multiclass(model, X_val_std, y_val)
    results[name] = metrics
    
    #
    print(f"\n{name} Classification Report:")
    print(classification_report(y_val, model.predict(X_val_std)))
    
    #
    plt.figure()
    sns.heatmap(cm, annot=True, fmt='d', 
                xticklabels=iris.target_names,
                yticklabels=iris.target_names)
    plt.title(f'{name} Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    plt.show()

#
results_df = pd.DataFrame(results).T
print("\n=== Performance Comparison ===")
print(results_df[['Accuracy', 'F1 (Macro)', 'F1 (Micro)']])

#
def plot_multiclass_decision_regions(X, y, model, title):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.02),
                         np.arange(y_min, y_max, 0.02))
    
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = np.array([np.where(iris.target_names == z)[0][0] for z in Z]).reshape(xx.shape)
    
    plt.figure(figsize=(8, 6))
    plt.contourf(xx, yy, Z, alpha=0.4, cmap=plt.cm.Paired)
    
    #data points
    for i, species in enumerate(iris.target_names):
        plt.scatter(X[y == species][:, 0], X[y == species][:, 1], 
                    color=plt.cm.Paired(i/3.), 
                    label=species,
                    edgecolor='black', s=60)
    
    plt.title(title)
    plt.xlabel('Sepal Length (standardized)')
    plt.ylabel('Sepal Width (standardized)')
    plt.legend()
    plt.show()

# 
for name, model in models.items():
    plot_multiclass_decision_regions(
        X_val_std, y_val, model,
        title=f'{name} Decision Regions'
    )



    

