<a href="https://colab.research.google.com/github/Maruf346/AI-ML-with-python/blob/main/KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **A. Experiment: Best Train-Test Split for Iris Dataset**

In [36]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

iris = load_iris()
X = iris.data
y = iris.target

best_acc = 0
best_ratio = 0

ratios = [0.1, 0.2, 0.3, 0.4, 0.5,0.6,0.7,0.8,0.9]
for test_ratio in ratios:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=42)

    # Scaling (preprocessing...)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(X_train_scaled, y_train)
    preds = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, preds)
    print(f"Test size: {test_ratio*100}%, Accuracy: {acc:.2f}")

    if acc > best_acc:
        best_acc = acc
        best_ratio = test_ratio

print(f"\nBest test ratio is: {best_ratio} with accuracy: {best_acc:.2f}")



Test size: 10.0%, Accuracy: 1.00
Test size: 20.0%, Accuracy: 1.00
Test size: 30.0%, Accuracy: 1.00
Test size: 40.0%, Accuracy: 0.98
Test size: 50.0%, Accuracy: 0.96
Test size: 60.0%, Accuracy: 0.92
Test size: 70.0%, Accuracy: 0.94
Test size: 80.0%, Accuracy: 0.95
Test size: 90.0%, Accuracy: 0.93

Best test ratio is: 0.1 with accuracy: 1.00


# **B. Experiment: Best Train-Test Split for a Custom Dataset**

In [35]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

#my dataset...
data = {
    "Height": [150, 160, 170, 180, 190, 200],
    "Weight": [45, 55, 65, 75, 85, 95],
    "Gender": ["F", "F", "M", "M", "M", "M"]
}
df = pd.DataFrame(data)

X = df[["Height", "Weight"]]
y = LabelEncoder().fit_transform(df["Gender"])

ratios = [0.1, 0.2, 0.3, 0.4, 0.5]

best_acc = 0
best_ratio = 0

for test_ratio in ratios:
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_ratio, random_state=18)

    # Scaling (Preprocessing...)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    model = KNeighborsClassifier(n_neighbors=3)
    model.fit(X_train_scaled, y_train)
    preds = model.predict(X_test_scaled)
    acc = accuracy_score(y_test, preds)

    print(f"Test size: {test_ratio*100}%, Accuracy: {acc:.2f}")

    if acc > best_acc:
        best_acc = acc
        best_ratio = test_ratio

print(f"\nBest test ratio is: {best_ratio} with accuracy: {best_acc:.2f}")


Test size: 10.0%, Accuracy: 1.00
Test size: 20.0%, Accuracy: 1.00
Test size: 30.0%, Accuracy: 1.00
Test size: 40.0%, Accuracy: 0.67
Test size: 50.0%, Accuracy: 0.67

Best test ratio is: 0.1 with accuracy: 1.00
