In [1]:
import pandas as pd
import numpy as np 

In [2]:
df = pd.read_csv('/Users/dynaneshwarijangale/Downloads/Tennis.csv')
print(df)

   Attributes     Groups                        Values
0     BallPos    Serving  Right Section of Middle Road
1         NaN        NaN                    Right Zone
2         NaN        NaN   Left Section of Middle Road
3         NaN        NaN                     Left Zone
4         NaN        NaN  Outside Corner of Right Zone
..        ...        ...                           ...
59       Spin    Up spin                      Top Spin
60        NaN    No spin                      Flat Hit
61        NaN  Down spin                     Down Spin
62        NaN     Others                    Data Error
63        NaN        NaN                         Score

[64 rows x 3 columns]


In [5]:
dataset = [
    {'BallPos': 'Right Section of Middle Road', 'StrikePos': 'Right Section of Middle Road', 'StrikeTech': 'Overhand Serving', 'Spin': 'Top Spin'},
    {'BallPos': 'Right Section of Middle Road', 'StrikePos': 'Right Section of Middle Road', 'StrikeTech': 'Overhand Serving', 'Spin': 'Top Spin'},
    {'BallPos': 'Right Zone', 'StrikePos': 'Right Zone', 'StrikeTech': 'Overhand Serving', 'Spin': 'Top Spin'},
]

all_ball_positions = ['Right Section of Middle Road', 'Right Zone', 'Left Section of Middle Road', 'Left Zone', 'Outside Corner of Right Zone', 'Outside Corner of Left Zone', 'Middle of Right Zone', 'Middle of Left zone', 'Corner on Right', 'Corner on Left', 'Right Zone Frontcourt', 'Right Zone Backcourt', 'Left Zone Frontcourt', 'Left Zone Backcourt', 'Left Zone Front Field', 'Left Zone Midfield', 'Middle Road Left Area Front Field', 'Middle Road Left Area Central Front', 'Middle Road Right Front', 'Middle Front Right Field', 'Right Zone Front', 'Right Zone Midfield', 'Left Zone Midfielder', 'Left Zone Backfield', 'Midfield Left Midfield', 'Middle Road Left Area Backfield', 'Middle Road Right Area Midfielder', 'Middle Road Right Area Backfield', 'Right Zone Midfielder', 'Right Zone Backfield', 'Unexpected Ball', 'Score', 'Data Error']
all_strike_positions = ['Right Section of Middle Road', 'Right Zone', 'Left Section of Middle Road', 'Left Zone', 'Forehand', 'Backhand', 'Sideway', 'Reversed Sideway', 'Untouch Ball', 'Score', 'Data Error']
all_strike_techniques = ['Overhand Serving', 'Underhand Serving', 'Drive', 'Volleyball', 'High Voltage Ball', 'Floor Pressure', 'Intercept', 'Drop Shot', 'Push, Block', 'Rebound', 'Pick', 'Cutting', 'Unexpected Techniques', 'Score', 'Data Error']
all_spins = ['Top Spin', 'Flat Hit', 'Down Spin', 'Data Error', 'Score']

def encode_dataset(dataset):
    encoded_data = []
    for entry in dataset:
        encoded_entry = []
        for attribute, possible_values in zip(entry.values(), [all_ball_positions, all_strike_positions, all_strike_techniques, all_spins]):
            encoded_attribute = [0] * len(possible_values)
            encoded_attribute[possible_values.index(attribute)] = 1
            encoded_entry.extend(encoded_attribute)
        encoded_data.append(encoded_entry)
    return encoded_data

encoded_dataset = encode_dataset(dataset)

def knn(train_data, test_instance, k):
    distances = []
    for train_instance in train_data:
        dist = sum((train - test) ** 2 for train, test in zip(train_instance, test_instance)) ** 0.5
        distances.append((train_instance, dist))
    distances.sort(key=lambda x: x[1])
    neighbors = distances[:k]
    return neighbors

def predict_class(neighbors):
    votes = {}
    for neighbor in neighbors:
        label = neighbor[0][-1]
        votes[label] = votes.get(label, 0) + 1
    return max(votes, key=votes.get)

def accuracy(actual, predicted):
    correct = 0
    for act, pred in zip(actual, predicted):
        if act == pred:
            correct += 1
    return correct / float(len(actual))

def precision(actual, predicted, positive_label):
    tp = sum(act == positive_label and pred == positive_label for act, pred in zip(actual, predicted))
    fp = sum(act != positive_label and pred == positive_label for act, pred in zip(actual, predicted))
    
    if tp + fp == 0:
        return 0
    
    return tp / (tp + fp)


def recall(actual, predicted, positive_label):
    tp = sum(act == positive_label and pred == positive_label for act, pred in zip(actual, predicted))
    fn = sum(act == positive_label and pred != positive_label for act, pred in zip(actual, predicted))
    if tp + fn == 0:
        return 0
    
    return tp / (tp + fn)



def f1_score(precision, recall):
    
    if precision + recall == 0:
        return 0
    return 2 * (precision * recall) / (precision + recall)


def confusion_matrix(actual, predicted):
    unique_labels = set(actual)
    matrix = {label: {label_: 0 for label_ in unique_labels} for label in unique_labels}
    for act, pred in zip(actual, predicted):
        matrix[act][pred] += 1
    return matrix

train_data = encoded_dataset[:int(0.7 * len(encoded_dataset))]  # 70% of data for training
test_data = encoded_dataset[int(0.7 * len(encoded_dataset)):]   # 30% of data for testing


k = 3
predictions = []
actual_labels = [entry[-1] for entry in test_data]
for test_instance in test_data:
    neighbors = knn(train_data, test_instance[:-1], k)
    predicted_class = predict_class(neighbors)
    predictions.append(predicted_class)


acc = accuracy(actual_labels, predictions)
precision_ = precision(actual_labels, predictions, positive_label=1)  
recall_ = recall(actual_labels, predictions, positive_label=1)  
f1 = f1_score(precision_, recall_)
conf_matrix = confusion_matrix(actual_labels, predictions)

print("Accuracy:", acc)
print("Precision:", precision_)
print("Recall:", recall_)
print("F1 Score:", f1)
print("Confusion Matrix:")
for row in conf_matrix:
    print(conf_matrix[row])

Accuracy: 1.0
Precision: 0
Recall: 0
F1 Score: 0
Confusion Matrix:
{0: 1}
