In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [4]:
wine_df=pd.read_csv('wine.csv')

In [5]:
wine_df.columns

Index(['Alcohol', 'Malic_Acid', 'Ash', 'Ash_Alcanity', 'Magnesium',
       'Total_Phenols', 'Flavanoids', 'Nonflavanoid_Phenols',
       'Proanthocyanins', 'Color_Intensity', 'Hue', 'OD280', 'Proline',
       'Customer_Segment'],
      dtype='object')

In [7]:
# Features (all columns except 'Customer_Segment')
wine_X = wine_df.drop(columns=['Customer_Segment'])

# Target (the 'Customer_Segment' column)
wine_y = wine_df['Customer_Segment']


In [6]:
wine_df.head(5)

Unnamed: 0,Alcohol,Malic_Acid,Ash,Ash_Alcanity,Magnesium,Total_Phenols,Flavanoids,Nonflavanoid_Phenols,Proanthocyanins,Color_Intensity,Hue,OD280,Proline,Customer_Segment
0,14.23,1.71,2.43,15.6,127,2.8,3.06,0.28,2.29,5.64,1.04,3.92,1065,1
1,13.2,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.4,1050,1
2,13.16,2.36,2.67,18.6,101,2.8,3.24,0.3,2.81,5.68,1.03,3.17,1185,1
3,14.37,1.95,2.5,16.8,113,3.85,3.49,0.24,2.18,7.8,0.86,3.45,1480,1
4,13.24,2.59,2.87,21.0,118,2.8,2.69,0.39,1.82,4.32,1.04,2.93,735,1


In [8]:
test_sizes = [0.3, 0.4, 0.5]
k_values = [1, 2, 3]
results = []

# Function to run KNN
def run_knn(X, y):
    for test_size in test_sizes:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
        for k in k_values:
            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)
            
            # Store classification report and confusion matrix
            report = classification_report(y_test, y_pred, output_dict=True)
            cm = confusion_matrix(y_test, y_pred)
            results.append([test_size, k, report, cm])

# Run KNN for Wine dataset
run_knn(wine_X, wine_y)


In [9]:
for result in results:
    test_size, k, report, cm = result
    print(f"Test Size: {test_size}, K-Value: {k}")
    print("Classification Report:")
    print(pd.DataFrame(report).transpose())
    print("Confusion Matrix:")
    print(cm)
    print("-" * 50)


Test Size: 0.3, K-Value: 1
Classification Report:
              precision    recall  f1-score    support
1              0.809524  0.894737  0.850000  19.000000
2              0.842105  0.761905  0.800000  21.000000
3              0.714286  0.714286  0.714286  14.000000
accuracy       0.796296  0.796296  0.796296   0.796296
macro avg      0.788638  0.790309  0.788095  54.000000
weighted avg   0.797503  0.796296  0.795370  54.000000
Confusion Matrix:
[[17  0  2]
 [ 3 16  2]
 [ 1  3 10]]
--------------------------------------------------
Test Size: 0.3, K-Value: 2
Classification Report:
              precision    recall  f1-score    support
1              0.739130  0.894737  0.809524  19.000000
2              0.695652  0.761905  0.727273  21.000000
3              0.625000  0.357143  0.454545  14.000000
accuracy       0.703704  0.703704  0.703704   0.703704
macro avg      0.686594  0.671261  0.663781  54.000000
weighted avg   0.692633  0.703704  0.685506  54.000000
Confusion Matrix:
[[17  

In [10]:
from tabulate import tabulate

In [17]:
table = []

for result in results:
    test_size, k, report, cm = result
    table.append([test_size, k, report, cm])

# Display the table
headers = ["Test Size", "K-Value", "Classification Report", "Confusion Matrix"]
print(tabulate(table, headers=headers,tablefmt='fancy_grid'))

╒═════════════╤═══════════╤═══════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╤════════════════════╕
│   Test Size │   K-Value │ Classification Report                                                                                                                                                                                                                                                                                  

In [14]:
df_results = pd.DataFrame(results, columns=["Test Size", "K-Value", "Classification Report", "Confusion Matrix"])

# Save the results to an Excel file
df_results.to_excel("wine_knn_results.xlsx", index=False)

print("Results saved to 'wine_knn_results.xlsx'")

Results saved to 'wine_knn_results.xlsx'


In [25]:
abalone_df=pd.read_csv('abalone.csv');

In [26]:
abalone_df.columns

Index(['Sex', 'Length', 'Diameter', 'Height', 'Whole weight', 'Shucked weight',
       'Viscera weight', 'Shell weight', 'Rings'],
      dtype='object')

In [27]:
abalone_X = abalone_df.drop(columns=['Rings'])
abalone_y = abalone_df['Rings']

# Convert categorical 'Sex' column into numerical form using one-hot encoding
abalone_X = pd.get_dummies(abalone_X, columns=['Sex'], drop_first=True)

In [28]:
test_sizes = [0.3, 0.4, 0.5]
k_values = [1, 2, 3]
results = []

# Function to run KNN and store results
def run_knn(X, y):
    for test_size in test_sizes:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
        for k in k_values:
            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)

            # Generate classification report and confusion matrix
            report = classification_report(y_test, y_pred, output_dict=False)
            cm = confusion_matrix(y_test, y_pred)

            # Store results in a list
            results.append([test_size, k, classification_report(y_test, y_pred, output_dict=False), cm])

# Run KNN for Abalone dataset
run_knn(abalone_X, abalone_y)



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

In [29]:
for result in results:
    test_size, k, report, cm = result
    print(f"\nTest Size: {test_size}, K-Value: {k}")
    print("Classification Report:")
    print(report)
    print("Confusion Matrix:")
    print(cm)


Test Size: 0.3, K-Value: 1
Classification Report:
              precision    recall  f1-score   support

           3       0.12      0.33      0.18         3
           4       0.27      0.32      0.29        19
           5       0.22      0.18      0.20        45
           6       0.21      0.16      0.18        77
           7       0.25      0.26      0.25       129
           8       0.24      0.27      0.26       164
           9       0.26      0.25      0.26       212
          10       0.23      0.23      0.23       191
          11       0.19      0.22      0.21       137
          12       0.08      0.07      0.08        84
          13       0.06      0.08      0.07        53
          14       0.05      0.06      0.06        34
          15       0.04      0.03      0.03        31
          16       0.16      0.14      0.15        21
          17       0.06      0.07      0.06        14
          18       0.17      0.07      0.10        15
          19       0.00      0

In [30]:
df_results = pd.DataFrame(results)

# Save the results to an Excel file
df_results.to_excel("abalone_knn_results.xlsx", index=False)

print("Results saved to 'abalone_knn_results.xlsx'")

Results saved to 'abalone_knn_results.xlsx'


In [33]:
iris_df=pd.read_csv('IRIS.csv')

In [34]:
iris_df.columns


Index(['Id', 'SepalLengthCm', 'SepalWidthCm', 'PetalLengthCm', 'PetalWidthCm',
       'Species'],
      dtype='object')

In [35]:
iris_X = iris_df.drop(columns=['Id', 'Species'])  # Exclude 'Id' as it's not a feature
iris_y = iris_df['Species']

In [36]:
test_sizes = [0.3, 0.4, 0.5]
k_values = [1, 2, 3]
results = []

In [37]:
def run_knn(X, y):
    for test_size in test_sizes:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
        for k in k_values:
            knn = KNeighborsClassifier(n_neighbors=k)
            knn.fit(X_train, y_train)
            y_pred = knn.predict(X_test)

            # Generate classification report and confusion matrix
            report = classification_report(y_test, y_pred, output_dict=True)
            cm = confusion_matrix(y_test, y_pred)

In [38]:
results.append({
                'Test Size': test_size,
                'K-Value': k,
                'Classification Report': str(report),  # Convert dict to string
                'Confusion Matrix': str(cm)             # Convert matrix to string
            })

In [39]:
run_knn(iris_X, iris_y)

In [40]:
# Convert results into a DataFrame
df_results = pd.DataFrame(results)

# Save the results to an Excel file
df_results.to_excel("iris_knn_results.xlsx", index=False)

print("Results saved to 'iris_knn_results.xlsx'")

Results saved to 'iris_knn_results.xlsx'
