# Project 1 Notebook

-Ben and Jax

# API Requests from Yelp


## Importing libraries

In [None]:
import requests
import pandas as pd
import json
import csv
import numpy as np

with open(r'C:\Users\bmcca\.secret\yelp_api.json') as f:
    keys = json.load(f)

client_id = keys['id']
yelp_key = keys['key']

## Creating the Request Function

**Goal:** Using Yelp API to determine best location and market for our winery/vineyard.


* **How:** comparing Yelp reviews, pricing, and ratings for businesses in separate regions to determine best market.


* **Desired information:** businesses near specific locations and associated information.


* **What:** to create a function/multiple functions to request data from the Yelp API, including general business information as well as selections of reviews for each  business.

### ƒ: 

* Create either two separate functions for API calls or one combined function to perform calls.
 * Would we need a function here, or would standard code work better?
 

* Create two separate .csv files to store requested data:
 * One file for "Businesses" request data.
 * One file for "Reviews" request data.
 
 
 * **IMPORTANT:** Prior to requesting reviews, process "Businesses" to remove irrelevant businesses (focus on "wineries" and "wine tasting rooms").

* Construct function(s) to include pagination within it by default - no need for multiple functions.

In [None]:
def parse_data(list_of_data):
    '''Adapted from Tyrell's code'''  

    # Create empty list to store results
    
    parsed_data = []
    
    # Loop through each business in the list of businesses
    # Add specific k:v pairs to a dictionary
    # These pairs will be used to build a DF afterwards
    
    for business in list_of_data:
        if 'price' not in business:
            business['price'] = np.nan
            
            # Verify that the "price" key is in the selected business dict
            
        details = {'name': business['name'],
                     'location': ' '.join(business['location']['display_address']),
                     'id': business['id'],
                     'alias': business['categories'][0]['alias'],
                     'title': business['categories'][0]['title'],
                     'rating': business['rating'],
                     'review_count': business['review_count'],
                     'price': business['price'],
                     'latitude': business['coordinates']['latitude'],
                     'longitude': business['coordinates']['longitude']
                    }

        parsed_data.append(details)

    # Create a DataFrame from the resulting list
    
    df_parsed_data = pd.DataFrame(parsed_data)
    
   
    return df_parsed_data

In [None]:
def yelp_request_offset(term, location, yelp_key, offset=0, verbose=False):
    '''Adapted from Yelp API Lab: https://github.com/BenJMcCarty/dsc-yelp-api-lab/tree/solution'''
    
    url = 'https://api.yelp.com/v3/businesses/search'

    headers = {
            'Authorization': 'Bearer {}'.format(yelp_key),
        }

    url_params = {
                    'term': term.replace(' ', '+'),
                    'location': location.replace(' ', '+'),
                    'limit': 50,
                    'offset': offset
                        }
    
    response = requests.get(url, headers=headers, params=url_params)
    
    if verbose == True:
        print(response)
        print(type(response.text))
        print(response.text[:1000])
        
    return response.json()

In [None]:
def get_full_data(term, location, yelp_key, file_name = 'data/wineries.csv'):
    '''Requests all results from Yelp API; saves as a .csv; and returns a DataFrame.'''
    blank_df = pd.DataFrame()
    blank_df.to_csv(file_name)
    
    # Process first request to Yelp API and calculate number of pages 
    results = yelp_request_offset(term, location, yelp_key, offset=0, verbose=False)
    num_pages = results['total']//50+1
    
    # Print out confirmation feedback
    print(f'For {term} and {location}: ')
    print(f"    Total number of results: {results['total']}.")
    print(f'    Total number of pages: {num_pages}.')
    
    # Create offset for further results and create empty list
    offset = 0
    parsed_results_dfs = []

    # Retrieves remaining pages
    for num in range(num_pages-1):
        try:
            results = yelp_request_offset(term, location, yelp_key, offset=offset, verbose=False)
            parsed_results = parse_data(results['businesses'])
            parsed_results_dfs.append(parsed_results)
            offset += 50
        except:
            print(f'Error on page {num}.')
            parsed_results_dfs.to_csv(file_name, mode='a')

    # Concatenate DataFrames and save to .csv
    df_concat = pd.concat(parsed_results_dfs, ignore_index=True)

    try:
        df_concat.to_csv(file_name)
        print(f'Saved to {file_name}.')
    except:
        print(f'Error, did not save.')
        
    return df_concat

#### Identifying and Exploring Keys

##### Key 1

##### Key 2

##### Key 3

## Parsing and Prepping Data

* Given a raw .csv file, filter out relevant data.


* For filtered data, ensure properly formatted (data types, structure, etc.).


* Save cleaned data in two new .csv files to preserve raw data (in case of any issues).

# Exploring Datasets

## Overview of Data

## Data Visualizations

* Suggested plots: scatter (single region and overlaid with all regions), hist (per region), bar (single and stacked). and box plot (single and overall)

# Conclusions and Recommendations

* Summarize data


* 3 Actionable recommendations
 * Use visualizations to support recommendations


* Recommendations for further exploration/analysis