In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Read the CSV file
df = pd.read_csv('C:/Users/zeesh/Downloads/penguins.csv')
print(df.head())

# Convert categorical variables to numerical using one-hot encoding
df = pd.get_dummies(df, columns=['sex', 'island'], drop_first=True)

# Encode the target variable 'species'
label_encoder = LabelEncoder()
df['species'] = label_encoder.fit_transform(df['species'])

# Select appropriate features for k-NN, Logistic Regression, and SVM
features = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'island_Dream', 'island_Torgersen']
X = df[features]
y = df['species']  # Use the encoded 'species' column as the target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# K-NN for different values of K
for k in range(1, 11):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train_scaled, y_train)
    y_pred = knn.predict(X_test_scaled)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'k-NN (k={k}) Accuracy: {accuracy:.4f}')

# Logistic Regression with increased max_iter
logistic_reg = LogisticRegression(max_iter=1000)
logistic_reg.fit(X_train_scaled, y_train)
y_pred_logistic = logistic_reg.predict(X_test_scaled)
accuracy_logistic = accuracy_score(y_test, y_pred_logistic)
print(f'Logistic Regression Accuracy: {accuracy_logistic:.4f}')

# SVM
svm_model = SVC()
svm_model.fit(X_train_scaled, y_train)
y_pred_svm = svm_model.predict(X_test_scaled)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f'SVM Accuracy: {accuracy_svm:.4f}')


  species     island  bill_length_mm  bill_depth_mm  flipper_length_mm  \
0  Adelie  Torgersen            39.1           18.7                181   
1  Adelie  Torgersen            39.5           17.4                186   
2  Adelie  Torgersen            40.3           18.0                195   
3  Adelie  Torgersen            36.7           19.3                193   
4  Adelie  Torgersen            39.3           20.6                190   

   body_mass_g     sex  
0         3750    MALE  
1         3800  FEMALE  
2         3250  FEMALE  
3         3450  FEMALE  
4         3650    MALE  
k-NN (k=1) Accuracy: 0.9851
k-NN (k=2) Accuracy: 1.0000
k-NN (k=3) Accuracy: 1.0000
k-NN (k=4) Accuracy: 1.0000
k-NN (k=5) Accuracy: 1.0000
k-NN (k=6) Accuracy: 1.0000
k-NN (k=7) Accuracy: 1.0000
k-NN (k=8) Accuracy: 1.0000
k-NN (k=9) Accuracy: 1.0000
k-NN (k=10) Accuracy: 1.0000
Logistic Regression Accuracy: 1.0000
SVM Accuracy: 1.0000
