In [None]:
#imports(if needed)

Main Question: Which Games did the Cougar Men's Basketball Team Perform the best and which games did they perform the worst based on the point difference (points scored - points allowed to score)? 
For this project we will compare using a Binary Search tree and a heap  to find a specific game as well as the max and min score difference. I also want to sort and return all of the games based on the score difference.

The Data:
 Data comes from a .csv file sourced from https://www.sports-reference.com/cbb/schools/washington-state/men/2024-gamelogs-advanced.html. I loaded it into excel and cleaned it dropping all of the stats collected for each game except the scores, date, school played, and location of the game. 

 Below is the code for our nodes that will hold the data for each game. The attributes stored are:
 game_number: The id for the game assigned cumulativly (1 is the first game played...)
 date_played: The date the game was played
 location: The location where the game was played
 WorL: Whether the game was a Win or a Loss
 team_score: The score of the cougars at the end of the game
 opponent_score: The score of the opposing team at the end of the game
 score_difference: The difference in points scoreed betweeen the cougars and the opposing team calculated by team_score - opponent_score
 The other attributes are used for the data structures.

In [None]:
class Node: #this the node structure that will hold our individual data points from each game. We will read games into nodes from the csv file later.
    def __init__(self, game_number, date_played, location, opponent, WorL, team_score, opponent_score):
        self.game_number = game_number
        self.date_played = date_played
        self.location = location
        self.opponent = opponent
        self.WorL = WorL
        self.team_score = team_score
        self.opponent_score = opponent_score
        self.score_difference = team_score - opponent_score
        #below are helper attributes for the data structures used to store the nodes.
        self.left = None
        self.right = None

Binary Search Tree (BST):
Below is the code for the binary search tree. Essentially we start with a data point as our "root" or starting point. Then when inserting data we insert points "under" the node as a "child" to the left or right based on whether a specified value is greater than or less than the roots specified value. Because a node can only have two children if a node has 2 children already we just move "down" a node and insert there instead. This means that a binary search tree will gain height as we add nodes. 

Important notes, because of the left and right rules the maximum and minimum values will always be the left and rightmost nodes of the tree meaning the time spent to find the minimum and maximum values is directly related to the vertical height of our tree.

Searching for nodes is different however because we have to individually check the value of each node assisted by the fact that we can figure out where to search more efficenetly based on whether the value of the current examined node is greater than or less than our searched for value if that is the information we construct our tree around, otherwise it is random as wee search the tree whether we find it or not.

In [None]:
#Binary Search Tree 
class BST:
    def __init__(self):
        self.root = None
    
    def insert(self, node):
        "This function inserts a node using the _insert helper function."
        if self.root is None: #if there is no root then create one.
            self.root = node
        else: #if there is a root use the helper function to insert the node to the right place.
            self._insert(self.root, node)
            
    def _insert(self, current_node, new_node):
        """This helper function takes a current node and a node to be placed. It then checks the values of the nodes to decide whether the new node
        should be placed to the left or right of the current node based on whether it is smaller or larger than the current node."""
        if new_node.score_difference < current_node.score_difference:
            if current_node.left is None: #does the node have a child to the left?
                current_node.left = new_node #no: Insert to the left
            else:
                self._insert(current_node.left, new_node) #yes: then check again with the left child as the current node.
        else:
            if current_node.right is None: #does the node have a child to the right?
                current_node.right = new_node #no
            else:
                self._insert(current_node.right, new_node) #yes
    
    def height(self):
        """This function returns the height of a the BST using a th _height helper function.
        @return the height of the BST."""
        return self._height(self.root) #call helper function with the root node as the starting node.

    def _height(self, starting_node):
        """This function takes a starting and follows the left and right child nodes down to find the distance from the starting node to the furthest node.
        @return the distance between the starting node and the furthest child node."""
        if starting_node is None: #no starting node
            return -1
        left_height = self._height(starting_node.left) #check height going to left
        right_height = self._height(starting_node.right) #check height going to right
        return 1 + max(left_height, right_height) #return which ever heeght is greater, left or right + 1 to account for the currrent node.
            
    def search(self, value):
        """This function searches for a node with a specific value in the BST using the _search helper function.
        @param, the value to be searched for in the BST.
        @return the node with the value if it exists, otherwise None."""
        return self._search(self.root, value)

    def _search(self, node, value):
        if node is None:
            return None  #Value not found
        elif value < node.score_difference:
            return self._search(node.left, value)  #Value might be in the left subtree
        elif value > node.score_difference:
            return self._search(node.right, value)  #Value might be in the right subtree
        else:
            return node  #Value found
        
    def in_order_traversal(self):
        """This function performs an in order traveral of the BST sing the _in_order_traversal helper function. This will sort the data in order and return it."""
        return self._in_order_traversal(self.root, [])
    
    def _in_order_traversal(self, node, sorted_data):
        """This function performs an in order traversal of the BST and appends the data to a list."""
        if node is not None:
            self._in_order_traversal(node.left, sorted_data)
            sorted_data.append(node.data)
            self._in_order_traversal(node.right, sorted_data)
        return sorted_data
            
        


Heap:
The code below defines the heap data structure, specifically the Max Heap structure. Like the binary search tree, it is based on a tree like structure with a root or top node that has child nodes under it. In this case, instead of choosing an arbitrary node as our starting value we have the largest value as our root (In a Min Tree we would choose the smallest value.) Afterwards, the nodes are placed under making sure that child nodes are always lesser than or equal to the value of the parent nodes. 

This structure leads to more uncertain traversal of the tree, as there are no left right rules for child nodes meaning the data is assigned in whichever order its read in. The maximum or minimum value is easy to acceess depending on the type (min or max) of the heap because it will be the root node, however only one will be easily accessable while the other will be one of the nodes on the bottom most layer of the tree.

Compared to the BST, insertion and removal of data is significantly faster as the trade off for losing access to easier min and max finding.


In [None]:
#Code for Heap
class Heap:
    def __iniit(self):
        self.heap = []
        
    def build_heap(self, data):
        """This function takes a set of nodes and build it into a heap using the heapify function.
        @param data, the data to be built into a heap."""
        self.heap = data[:]
        for i in range(len(self.heap)// 2, -1, -1):
            self.heapify(i)
            
    def heapify(self, i):
        left = 2 * i + 1
        right = 2 * i + 2
        largest = i
        if left < len(self.heap) and self.heap[left].score_difference > self.heap[largest].score_difference:
            largest = left
        if right < len(self.heap) and self.heap[right].score_difference > self.heap[largest].score_difference:
            largest = right
        if largest != i:
            self.heap[i], self.heap[largest] = self.heap[largest], self.heap[i]
            self.heapify(largest)
            
    def search(self, value):
        """This function searchees for a node with a specific value in the heap, it will return the node if found or None if not found.
        @param value, the value to bee search for in the heap
        @return the nodee with the value or None."""
        
        

Now that we have our structures defined, lets load our data from the .csv into the structures and perform our analysis by searching for the min and max score values as well a a game based on a random trait.

In [None]:
#load data from csv file into nodes and create our Binary Search Tree.
file = open("MensBasketBallStats.csv", "r")
node_list = []
for line in file: #Go line by line through the file and create a node for each line.
    data = line.split(",")
    node = Node(int(data[0]), data[1], data[2], data[3], data[4], int(data[5]),int(data[6]))
    node_list.append(node) #add nodes to the node list.