<a href="https://colab.research.google.com/github/Gokulnaath-gif/24ADI003-24BAD026/blob/main/EX%205.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import zipfile

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# --- IMPORTANT: Upload your 'archive (26).zip' file to the Colab environment ---
# You can drag and drop it into the files pane on the left,
# or use the file upload function.
# If you rename the file during upload, update 'zip_path' below.
zip_path = '/content/archive.zip' # Changed from local path to Colab path

try:
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        print("Files inside ZIP:", zip_ref.namelist())
        csv_name = zip_ref.namelist()[0]
        df = pd.read_csv(zip_ref.open(csv_name))
except FileNotFoundError:
    print(f"Error: The file '{zip_path}' was not found.")
    print("Please ensure you have uploaded the ZIP file to the Colab environment.")
    # Optionally, you can exit or create a dummy dataframe to avoid further errors
    df = pd.DataFrame() # Create an empty dataframe to prevent subsequent errors
except Exception as e:
    print(f"An error occurred while processing the ZIP file: {e}")
    df = pd.DataFrame() # Create an empty dataframe to prevent subsequent errors

if not df.empty:
    print("\nFirst 5 Rows:")
    print(df.head())
    print("\nMissing Values:")
    print(df.isnull().sum())
    if 'id' in df.columns:
        df.drop(columns=['id'], inplace=True)

    # Check if 'diagnosis' column exists before encoding
    if 'diagnosis' in df.columns:
        le = LabelEncoder()
        df['diagnosis'] = le.fit_transform(df['diagnosis'])
    else:
        print("Error: 'diagnosis' column not found. Cannot proceed with encoding.")
        # You might want to handle this case, e.g., by exiting or using a default target
        exit()

    # Check if required feature columns exist
    required_features = ['radius_mean','texture_mean','perimeter_mean','area_mean','smoothness_mean']
    if all(col in df.columns for col in required_features):
        X = df[required_features]
        y = df['diagnosis']

        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        X_train, X_test, y_train, y_test = train_test_split(
            X_scaled, y, test_size=0.2, random_state=42)

        knn = KNeighborsClassifier(n_neighbors=5)
        knn.fit(X_train, y_train)

        y_pred = knn.predict(X_test)

        print("\nAccuracy:", accuracy_score(y_test, y_pred))
        print("\nClassification Report:\n")
        print(classification_report(y_test, y_pred))

        cm = confusion_matrix(y_test, y_pred)
        print("\nConfusion Matrix:\n", cm)

        plt.figure()
        sns.heatmap(cm, annot=True, fmt='d')
        plt.xlabel("Predicted Label")
        plt.ylabel("Actual Label")
        plt.title("Confusion Matrix - KNN")
        plt.show()

        misclassified = np.where(y_test != y_pred)
        print("\nNumber of Misclassified Samples:", len(misclassified[0]))

        accuracy_list = []
        for k in range(1, 21):
            model = KNeighborsClassifier(n_neighbors=k)
            model.fit(X_train, y_train)
            pred = model.predict(X_test)
            accuracy_list.append(accuracy_score(y_test, pred))

        plt.figure()
        plt.plot(range(1,21), accuracy_list)
        plt.xlabel("K Value")
        plt.ylabel("Accuracy")
        plt.title("Accuracy vs K")
        plt.show()
    else:
        missing_cols = [col for col in required_features if col not in df.columns]
        print(f"Error: Missing required feature columns: {missing_cols}. Cannot proceed with model training.")


Error: The file '/content/archive.zip' was not found.
Please ensure you have uploaded the ZIP file to the Colab environment.
