In [2]:
!kaggle datasets download -d sakshisatre/tips-dataset

Dataset URL: https://www.kaggle.com/datasets/sakshisatre/tips-dataset
License(s): apache-2.0
tips-dataset.zip: Skipping, found more recently modified local copy (use --force to force download)


In [3]:
!unzip -o /content/tips-dataset.zip

Archive:  /content/tips-dataset.zip
  inflating: tip.csv                 


In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from collections import Counter
import math
import numpy as np

# KNN and Euclidean distance functions
def euclidean_distance(point1, point2):
    distance = 0
    for i in range(len(point1)):
        distance += (point1[i] - point2[i])**2
    return math.sqrt(distance)

# KNN regression to predict continuous values (like the tip)
def knn_classifier(training_data, new_point, k):
    distances = []
    for point, label in training_data:
        distance = euclidean_distance(new_point, point)
        distances.append((distance, label))
    distances.sort()
    nearest_neighbors = distances[:k]

    # Average the tips from the nearest neighbors
    neighbor_labels = [label for _, label in nearest_neighbors]
    predicted_value = sum(neighbor_labels) / len(neighbor_labels)
    return predicted_value

# Function to calculate Mean Absolute Error (MAE)
def mean_absolute_error(y_true, y_pred):
    return np.mean(np.abs(y_true - y_pred))



In [10]:
# Load the dataset
data = pd.read_csv('tip.csv')

# Prepare the data
features = data[['total_bill', 'size']].values
labels = data['tip'].values

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Normalize the features
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Combine features and labels into training data
training_data = list(zip(X_train, y_train))

In [11]:
#Classify each test point using KNN
k = 3  # Number of neighbours
predictions = []

for test_point in X_test:
    predicted_tip = knn_classifier(training_data, test_point, k)
    predictions.append(predicted_tip)


In [13]:

# Calculate the Mean Absolute Error (MAE) as the accuracy metric
mae = mean_absolute_error(y_test, predictions)
print(f"Mean Absolute Error (MAE): {mae:.2f}")

Mean Absolute Error (MAE): 0.75
