# SEARCH ALGORITHMS

Search algorithms are STEP BY STEP procedures used to locate and retrieve information from a set of data. 

## Linear Search 
Also called sequential search, algorithm sequentially checks whether a given a value is an element of a specified list by scanning the elements one by one. It checks all the elements in the list in order from the beginning to end until it finds a target value. 

### Searching

In [1]:
number_list = [ 10, 14, 19, 26, 27, 31, 33, 35, 42, 44]
target_number = 100

def linear_search(search_list, target_value):
    for idx in range(len(search_list)):
        if search_list[idx] == target_value:
            return idx
    raise ValueError("{0} not in list".format(target_value))

try:
    # Call the function below...
    print(linear_search(number_list, target_number))
except ValueError as error_message:
    print("{0}".format(error_message))

100 not in list


### Finding duplicates

In [2]:
# Search list and target value
tour_locations = [ "New York City", "Los Angeles", "Bangkok", "Istanbul", "London", "New York City", "Toronto"]
target_city = "New York City"

#Linear Search Algorithm
def linear_search(search_list, target_value):
    matches = []
    for idx in range(len(search_list)):
        if search_list[idx] == target_value:
            matches.append(idx)
    if len(matches)== 0:
        raise ValueError("{0} not in list".format(target_value))
    else: return matches

#Function call
tour_stops = linear_search(tour_locations, target_city)
print(tour_stops)

[0, 5]


### Finding the maximum value

In [6]:
# Search list
test_scores = [88, 93, 75, 100, 80, 67, 71, 92, 90, 83]

#Linear Search Algorithm
def linear_search(search_list):
    maximum_score_index = None
    for idx in range(len(search_list)):
        if not maximum_score_index or search_list[idx] > search_list[maximum_score_index]:
            maximum_score_index = idx
    return maximum_score_index

# Function call
highest_score = linear_search(test_scores)

#Prints out the highest score in the list
print(highest_score)
print(test_scores[highest_score])

3
100


## Binary Search 
With a sorted data-set, we can take advantage of the ordering to make a sort which is more efficient than going element by element.

### Using recursion

In [None]:
# define binary_search()
def binary_search(sorted_list, target):
    if not sorted_list:
        return 'value not found'
    mid_idx = len(sorted_list)//2
    mid_val = sorted_list[mid_idx]
    
    if mid_val == target:
        return mid_idx
    if mid_val > target:
        left_half = sorted_list[:mid_idx]
        return binary_search(left_half, target)
    if mid_val < target:
        right_half = sorted_list[mid_idx+1:]
        result = binary_search(right_half, target)
        
    if result == "value not found":
        return result
    else:
        return result + mid_idx + 1
    
# For testing:
sorted_values = [13, 14, 15, 16, 17]
print(binary_search(sorted_values, 16))

### Using recurson and pointers

In [7]:
def binary_search(sorted_list, left_pointer, right_pointer, target):
    # this condition indicates we've reached an empty "sub-list"
    if left_pointer >= right_pointer:
        return "value not found"

    # We calculate the middle index from the pointers now
    mid_idx = (left_pointer + right_pointer) // 2
    mid_val = sorted_list[mid_idx]

    if mid_val == target:
        return mid_idx
    if mid_val > target:
    # we reduce the sub-list by passing in a new right_pointer
        return binary_search(sorted_list, left_pointer, mid_idx, target)
    if mid_val < target:
    # we reduce the sub-list by passing in a new left_pointer
        return binary_search(sorted_list, mid_idx + 1, right_pointer, target)
  
values = [77, 80, 102, 123, 288, 300, 540]
start_of_values = 0
end_of_values = len(values)
result = binary_search(values, start_of_values, end_of_values, 288)

print("element {0} is located at index {1}".format(288, result))

element 288 is located at index 4


### Using loops

In [8]:
def binary_search(sorted_list, target):
    left_pointer = 0
    right_pointer = len(sorted_list)
  
    # fill in the condition for the while loop
    while left_pointer < right_pointer:
        # calculate the middle index using the two pointers
        mid_idx = (left_pointer + right_pointer) // 2
        mid_val = sorted_list[mid_idx]
        if mid_val == target:
            return mid_idx
        if target < mid_val:
        # set the right_pointer to the appropriate value
            right_pointer = mid_idx
        if target > mid_val:
        # set the left_pointer to the appropriate value
            left_pointer = mid_idx + 1

    return "Value not in list"

# test cases
print(binary_search([5,6,7,8,9], 9))
print(binary_search([5,6,7,8,9], 10))
print(binary_search([5,6,7,8,9], 8))
print(binary_search([5,6,7,8,9], 4))
print(binary_search([5,6,7,8,9], 6))

4
Value not in list
3
Value not in list
1


### SEARCH PROJECT
Search data with empty data in a dataset. We use pointers and loops

In [2]:

def sparse_search(data, search_val):
    print("Data: " + str(data))
    print("Search Value: " + str(search_val))
    # Pointers of the list
    first = 0
    last = len(data)-1
    while(first <= last):
        mid = (first + last)//2
        # If the mid value is empty
        if not data[mid]:
            left,right = mid - 1, mid + 1
            while True:
            #Check the value to the right
                if (left < first) and (right > last):
                    print("{0} is not in the dataset".format(search_val))
                    return
                #Check the value to the left
                elif (right <= last) and data[right]:
                    mid = right
                    break
                elif (left >= first) and data[left]:
                    mid = left
                    break
                else:
                    right +=1
                    left +=1
        # If search_value is at mid position
        if data[mid] == search_val:
            print("{0} found at position {1}".format(search_val,mid))
            return

        elif search_val < data[mid]:
            last = mid - 1
        elif search_val > data[mid]:
            first = mid + 1
    print("{0} is not in the dataset".format(search_val))
          

In [4]:
sparse_search(["Alex", "", "", "", "", "Devan", "", "", "Elise", "", "", "", "Gary", "", "", "Mimi", "", "", "Parth", "", "", "", "Zachary"], "Parth")

Data: ['Alex', '', '', '', '', 'Devan', '', '', 'Elise', '', '', '', 'Gary', '', '', 'Mimi', '', '', 'Parth', '', '', '', 'Zachary']
Search Value: Parth
Parth found at position 18
