Written by: David Tran

Date: 9 / 1 / 2018

We are going to use the following data from Yelp Datasets:

_yelp_academic_dataset_review.json

_yelp_academic_dataset_user.json

_yelp_academic_dataset_business.json

In [1]:
import sys
import json
import csv
import io

In [2]:
def filter_and_convert_to_csv(file_name):
    
    if "review" in file_name:
        
        outputFile = open('yelp_academic_dataset_review.csv', 'w')
        fields = ['user_id', 'business_id', 'stars']
            
        outputWriter = csv.DictWriter(outputFile, fieldnames = fields)
        outputWriter.writerow({'user_id': 'user_id', 
                               'business_id': 'business_id', 
                               'stars': 'user_rating'
                              })
        
        print("Converting ", file_name, " to ", "yelp_academic_dataset_review.csv ...")
   
        # Read line by line, write user_id, business_id, and stars to CSV file  
        for line in open(file_name, 'r'):
            r = json.loads(line)
            outputWriter.writerow({'user_id': r['user_id'], 
                                   'business_id': r['business_id'], 
                                   'stars': r['stars']
                                 })
        outputFile.close()

    elif "user" in file_name:

        outputFile = open('yelp_academic_dataset_user.csv', 'w')
        fields= ['user_id', 'name']   
     
        outputWriter = csv.DictWriter(outputFile, fieldnames = fields)
        outputWriter.writerow({'user_id': 'user_id', 
                               'name': 'user_name'
                             })
            
        print("Converting ", file_name, " to ", "yelp_academic_user_review.csv ...")
   
        # Read line by line, write user_id and name to CSV file  
        for line in open(file_name, 'r'):
            r = json.loads(line)
            n = r['name']
            n1 = n.encode('ascii', 'ignore')
            outputWriter.writerow({'user_id': r['user_id'], 
                                   'name': n1
                                 })
        outputFile.close()
          
    elif "business" in file_name:
  
        outputFile = open('yelp_academic_dataset_business.csv', 'w')
        fields= ['business_id', 'city', 'name', 'categories', 'review_count', 'stars']  
        
        outputWriter = csv.DictWriter(outputFile, fieldnames = fields)
        outputWriter.writerow({'business_id': 'business_id', 
                               'city': 'city', 
                               'name': 'business_name', 
                               'categories': 'categories', 
                               'review_count': 'review_count', 
                               'stars': 'average_stars'
                             })
        print("Converting ", file_name, " to ", "yelp_academic_dataset_review.csv ...")
     
     # Read line by line, write relevant fields if the business is a restaurant
        for line in open(file_name, 'r'):
            r = json.loads(line)
            categories = str(r['categories'])
            if "Restaurants" in categories:      
                n = r['name']
                n1 = n.encode('ascii', 'ignore')
                c = r['city']
                c1 = c.encode('ascii', 'ignore')

                # Now write the result to a CSV file
                outputWriter.writerow({'business_id': r['business_id'], 
                                       'city': c1, 
                                       'name': n1, 
                                       'categories': r['categories'], 
                                       'review_count': r['review_count'], 
                                       'stars': r['stars']
                                     })
        outputFile.close()
                                  
    else: 
        print("Error!  Unexpected filename used.")
        exit()
        
def main():
    filter_and_convert_to_csv("yelp_academic_dataset_user.json")
    filter_and_convert_to_csv("yelp_academic_dataset_business.json")
    filter_and_convert_to_csv("yelp_academic_dataset_review.json")
    
if __name__ == '__main__':
    main()

Converting  yelp_academic_dataset_user.json  to  yelp_academic_user_review.csv ...
Converting  yelp_academic_dataset_business.json  to  yelp_academic_dataset_review.csv ...
Converting  yelp_academic_dataset_review.json  to  yelp_academic_dataset_review.csv ...
