In [None]:
'''
CSE 511: Data Processing at Scale
Fall 2023

Project 1 Submission

Author: Rohan Sambidi
'''

In [1]:
from unqlite import UnQLite

db = UnQLite('sample.db')
data = db.collection('data')

In [2]:
data.fetch(0) # Get the first entry from the data to understand the structure and contents

{'business_id': 'MPyxaNVuWlAQqJ0iKV5rQw',
 'type': 'business',
 'state': 'AZ',
 'latitude': 33.3482589,
 'name': "VinciTorio's Restaurant",
 'full_address': '1835 E Elliot Rd, Ste C109, Tempe, AZ 85284',
 'categories': ['Restaurants', 'Buffets', 'Italian'],
 'open': True,
 'stars': 4,
 'city': 'Tempe',
 'neighborhoods': [],
 '__id': 0,
 'review_count': 122,
 'longitude': -111.9088346}

In [3]:
# Graded Cell, PartID: o1flK

import math # Math library is used for calculating the distance

def FindBusinessBasedOnCity(cityToSearch, saveLocation1, collection):
    result = [] # Create a list to store the outputs
    
    for i in collection.all(): # For every entry in the collection, check if the city matches the given city name
        if i['city'] == cityToSearch:
            result.append("{}${}${}${}".format(i['name'], i['full_address'], i['city'], i['state'])) # If a match is found, retrive the data in desired format
    
    f = open(saveLocation1, 'w') # Create a file to write the retrieved data
    for i in result: # Iterate over the output list and write the data to the file
        f.write(i)
        f.write('\n')
    f.close()


def FindBusinessBasedOnLocation(categoriesToSearch, myLocation, maxDistance, saveLocation2, collection):
    result = [] # Create a list to store the outputs
    
    R = 3959
    lat1 = math.radians(myLocation[0]) # Convert the coordinates into radians
    lon1 = math.radians(myLocation[1])
    
    for i in collection.all(): # For every entry in the collection, check if the category matches the given category list
        if any(x in i['categories'] for x in categoriesToSearch): # If a match is found, calculate the distance using the haversine formula
            lat2 = math.radians(i['latitude'])
            lon2 = math.radians(i['longitude'])

            d_phi = lat2 - lat1
            d_lambda = lon2 - lon1

            a = math.sin(d_phi/2)**2 + math.cos(lat1) * math.cos(lat2) * math.sin(d_lambda/2)**2
            c = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a)) 

            d = R*c # Compute the distance

            if d <= maxDistance: # If the distance is within the given range, append the data to the output list
                result.append(i['name'])
        
    f = open(saveLocation2, 'w') # Create a file to write the retrieved data
    for i in result:  # Iterate over the output list and write the data to the file
        f.write(i)
        f.write('\n')
    f.close()

In [11]:
# Testing FindBusinessBasedOnCity function

true_results = ["VinciTorio's Restaurant$1835 E Elliot Rd, Ste C109, Tempe, AZ 85284$Tempe$AZ", "P.croissants$7520 S Rural Rd, Tempe, AZ 85283$Tempe$AZ", "Salt Creek Home$1725 W Ruby Dr, Tempe, AZ 85284$Tempe$AZ"]

try:
    FindBusinessBasedOnCity('Tempe', 'output_city.txt', data)
except NameError as e:
    print ('The FindBusinessBasedOnCity function is not defined! You must run the cell containing the function before running this evaluation cell.')
except TypeError as e:
    print ("The FindBusinessBasedOnCity function is supposed to accept three arguments. Yours does not!")
    
try:
    opf = open('output_city.txt', 'r')
except FileNotFoundError as e:
    print ("The FindBusinessBasedOnCity function does not write data to the correct location.")
    
lines = opf.readlines()
if len(lines) != 3:
    print ("The FindBusinessBasedOnCity function does not find the correct number of results, should be 3.")
    
lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print ("Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!")

Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!


In [5]:
# Additional test case 1

true_results =['3 Palms$7707 E McDowell Rd, Scottsdale, AZ 85257$Scottsdale$AZ', "Bob's Bike Shop$1608 N Miller Rd, Scottsdale, AZ 85257$Scottsdale$AZ", 'Ronan & Tagart, PLC$8980 E Raintree Dr, Ste 120, Scottsdale, AZ 85260$Scottsdale$AZ', "Sangria's$7700 E McCormick Pkwy, Scottsdale, AZ 85258$Scottsdale$AZ", 'Turf Direct$8350 E Evans Rd, Scottsdale, AZ 85260$Scottsdale$AZ']

try: 
    FindBusinessBasedOnCity('Scottsdale', 'output_city.txt', data) 
except NameError as e: 
    print ('The FindBusinessBasedOnCity function is not defined! You must run the cell containing the function before running this evaluation cell.') 
except TypeError as e: 
    print(e) 
    print ("The FindBusinessBasedOnCity function is supposed to accept three arguments. Yours does not!") 

try: 
    opf = open('output_city.txt', 'r') 
except FileNotFoundError as e: 
    print ("The FindBusinessBasedOnCity function does not write data to the correct location.") 

lines = opf.readlines()
if len(lines) != 5:
    print ("The FindBusinessBasedOnCity function does not find the correct number of results, should be 3.") 

lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print ("Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!")

Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!


In [6]:
# Additional test case 2

true_results =['Arizona Exterminating Co.$521 E Broadway Rd, Mesa, AZ 85204$Mesa$AZ', 'Bikram Yoga$1940 W 8th St, Ste 111, Mesa, AZ 85202$Mesa$AZ', "Denny's Restaurant$1330 S Power Rd, Mesa, AZ 85206$Mesa$AZ", 'Diamondback Gymnastics$7211 E Southern Avenue, Mesa, AZ 85209$Mesa$AZ', 'Southeast Valley Medical Group$1950 S Country Club Dr, Mesa, AZ 85210$Mesa$AZ', 'Spa Pima$2150 S Power Rd, Mesa, AZ 85209$Mesa$AZ', 'The Seafood Market$1910 S Gilbert Rd, Mesa, AZ 85204$Mesa$AZ']

try:
    FindBusinessBasedOnCity('Mesa', 'output_city.txt', data) 
except NameError as e: 
    print ('The FindBusinessBasedOnCity function is not defined! You must run the cell containing the function before running this evaluation cell.') 
except TypeError as e: 
    print(e) 
    print ("The FindBusinessBasedOnCity function is supposed to accept three arguments. Yours does not!") 

try: 
    opf = open('output_city.txt', 'r') 
except FileNotFoundError as e: 
    print ("The FindBusinessBasedOnCity function does not write data to the correct location.") 

lines = opf.readlines()
if len(lines) != 7:
    print ("The FindBusinessBasedOnCity function does not find the correct number of results, should be 3.") 

lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print ("Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!") 

Correct! You FindBusinessByCity function passes these test cases. This does not cover all possible test edge cases, however, so make sure that your function covers them before submitting!


In [12]:
# Testing FindBusinessBasedOnLocation function

true_results = ["VinciTorio's Restaurant"]

try:
    FindBusinessBasedOnLocation(['Buffets'], [33.3482589, -111.9088346], 10, 'output_loc.txt', data)
except NameError as e: 
    print ('The FindBusinessBasedOnLocation function is not defined! You must run the cell containing the function before running this evaluation cell.')
except TypeError as e:
    print ("The FindBusinessBasedOnLocation function is supposed to accept five arguments. Yours does not!")
    
try:
    opf = open('output_loc.txt','r')
except FileNotFoundError as e:
    print ("The FindBusinessBasedOnLocation function does not write data to the correct location.")

lines = opf.readlines()
if len(lines) != 1:
    print ("The FindBusinessBasedOnLocation function does not find the correct number of results, should be only 1.")

if lines[0].strip() == true_results[0]:
    print ("Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.")

Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.


In [8]:
# Additional test case 1

true_results =['The Seafood Market']

try: 
    FindBusinessBasedOnLocation(['Specialty Food'], [33.3482589, -111.9088346], 10, 'output_loc.txt', data) 
except NameError as e: 
    print ('The FindBusinessBasedOnLocation function is not defined! You must run the cell containing the function before running this evaluation cell.') 
except TypeError as e: 
    print ("The FindBusinessBasedOnLocation function is supposed to accept five arguments. Yours does not!") 

try:
    opf = open('output_loc.txt','r') 
except FileNotFoundError as e: 
    print ("The FindBusinessBasedOnLocation function does not write data to the correct location.") 

lines = opf.readlines()
if len(lines) != 1:
    print ("The FindBusinessBasedOnLocation function does not find the correct number of results, should be only 1.") 

lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results):
    print ("Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.")

Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.


In [9]:
# Additional test case 2

true_results = ['P.croissants'] 

try: 
    FindBusinessBasedOnLocation(['Bakeries'], [33.3482589, -111.9088346], 10, 'output_loc.txt', data) 
except NameError as e: 
    print ('The FindBusinessBasedOnLocation function is not defined! You must run the cell containing the function before running this evaluation cell.') 
except TypeError as e: 
    print ("The FindBusinessBasedOnLocation function is supposed to accept five arguments. Yours does not!")

try: 
    opf = open('output_loc.txt','r') 
except FileNotFoundError as e: 
    print ("The FindBusinessBasedOnLocation function does not write data to the correct location.")

lines = opf.readlines()
if len(lines) != 1: 
    print ("The FindBusinessBasedOnLocation function does not find the correct number of results, should be only 1.")

lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results): 
    print ("Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.")

Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.


In [10]:
# Additional test case 3

true_results = ['The Seafood Market', 'P.croissants']

try: 
    FindBusinessBasedOnLocation(['Food', 'Specialty Food'], [33.3482589, -111.9088346], 10, 'output_loc.txt', data) 
except NameError as e: 
    print ('The FindBusinessBasedOnLocation function is not defined! You must run the cell containing the function before running this evaluation cell.') 
except TypeError as e: 
    print ("The FindBusinessBasedOnLocation function is supposed to accept five arguments. Yours does not!")

try: 
    opf = open('output_loc.txt','r') 
except FileNotFoundError as e: 
    print ("The FindBusinessBasedOnLocation function does not write data to the correct location.")

lines = opf.readlines()
if len(lines) != 2: 
    print ("The FindBusinessBasedOnLocation function does not find the correct number of results, should be only 2.")

lines = [line.strip() for line in lines]
if sorted(lines) == sorted(true_results): 
    print ("Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.")

Correct! Your FindBusinessBasedOnLocation function passes these test cases. This does not cover all possible edge cases, so make sure your function does before submitting.
