In [2]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

# Load the datasets
class_data = pd.read_csv('class.csv')
zoo_data = pd.read_csv('zoo.csv')

# Merge the class labels with the zoo dataset based on the 'class_type' column
zoo_data_merged = zoo_data.merge(class_data, how='left', left_on='class_type', right_on='Class_Number')

def encode_categorical_columns(df):
    encoded_df = df.copy()
    encoder = LabelEncoder()
    
    # Selecting the categorical columns
    categorical_columns = df.select_dtypes(include=['object']).columns
    
    # Encoding only categorical variables
    for column in categorical_columns:
        encoded_df[column] = encoder.fit_transform(encoded_df[column])
    
    return encoded_df

zoo_data_merged = encode_categorical_columns(zoo_data_merged)

# Define the features and the target variable
X = zoo_data_merged.drop(['animal_name', 'class_type', 'Class_Number', 'Number_Of_Animal_Species_In_Class', 'Class_Type'], axis=1)
y = zoo_data_merged['class_type']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Create and train the KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_scaled, y_train)

# Predict the labels for the test set
y_pred = knn.predict(X_test_scaled)

# Calculate the accuracy and the classification report
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f'Accuracy: {accuracy}')
print(f'Classification report:\n{report}')

Accuracy: 0.9354838709677419
Classification report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00         3
           3       0.50      1.00      0.67         1
           4       1.00      1.00      1.00         2
           5       1.00      0.50      0.67         2
           6       0.83      1.00      0.91         5
           7       1.00      0.67      0.80         3

    accuracy                           0.94        31
   macro avg       0.90      0.88      0.86        31
weighted avg       0.96      0.94      0.93        31

