# Question 2 - Part 1
Read all the json files in the folder called Data.
There are three categories of json files in this folder. They are identified by the key called “term” in each of the json file.
Create a folder structure to read all these json files and store them into these separate folders. You are expected to create a hierarchy of folder structure.
Example:You can place all restaurants json files in a particular country (say Australia) in the same folder. How you group the json files and create a folder structure is your choice. Your task is to identify criteria by which you can group all these json files and store them.
(You could use these keys to create hierarchy and store json files: Country, city, categories)

Output Format:
Create a folder (Name: Data Processed)
In this folder you should have a hierarchy of folder structures (Example: Data Processed/Australia(AU)/........)

In [10]:
import os
import json
import re
import shutil
import glob

In [11]:
def create_parent_directory():                      #creation of parent folder data_processed
    current_dir = os.path.dirname('__file__')
    home_folder = os.path.join(current_dir, 'Data Processed')
    if not os.path.exists(home_folder):
        os.mkdir(home_folder)
    return home_folder

def make_directory_with_country(home_folder, country_name, city_name, term, category):   # creation of folder structure by joining the input parameters 
    directory =os.path.join(home_folder, country_name, city_name, term, category)
    if not os.path.exists(directory):
        os.makedirs(directory)                  #If folder exists, return its path
        return directory
    else:                            
        return directory

def write_to_json_file(file_path, json_data):         #Function to write data to json file at respective location
    with open(file_path, 'w') as json_out:
        json.dump(json_data, json_out)
        
remove_numbers_lam = lambda value: re.sub(r'\d+', '', value).strip()    #Lambda expression to remove numbers

In [12]:
home_folder = create_parent_directory()

for filename in glob.glob(r'C:\Users\rajas\Downloads\DataAnalysis4Python_Spring17-master\Assignment 2\Data\*.json'):   #Read all json files
    
    
    file_name = os.path.basename(filename)               #Get the file name
    with open(filename) as f:
        
        data_from_file = json.load(f)                     #Load data from json file
        
    
        categories = [category["title"] for category in data_from_file["categories"]]
        for category in categories:
            
            category = ' '.join(category.split())              #Remove multiple spaces
            
            
            city_name = ' '.join((data_from_file["location"]["city"]).split())    #Remove extra spaces and numbers
            city_name = remove_numbers_lam(city_name)
            
            #Call function to get folder path
            data_dir = make_directory_with_country(home_folder, data_from_file["location"]["country"], city_name, data_from_file["term"], category)
            
            #Create file path by joining folder structure and file name
            file_path = os.path.join(data_dir, file_name)
            
            #Write to json file
            write_to_json_file(file_path, data_from_file)

# Successfully created json file in a particular way

# Question2-part2
Read all the jsonfiles in the folder called Data. Read only the jsonfiles which contain the key called “restaurants” Each (or most of the jsonfiles) contain a key called “open” which contains the details of the operation (timings) of the restaurants. For each jsonfile, read the timings of the restaurants. Data of the operation timings of the restaurants is present for each day of the week. I want you to extract each of this data and write it in an excel sheet.

In [13]:
def add_rows(file, details):     # writes data into restaurant 
    rows = []
    for detail in details:
        row = []
        row.append(file["name"])
        row.append(' '.join((file["location"]["city"]).split()))
        row.append(file["location"]["country"])
        row.append(detail['day'])
        row.append(detail['start'][:2])
        row.append(detail['start'][2:])
        row.append(detail['end'][:2])
        row.append(detail['end'][2:])
        rows.append(row)
    return rows

def write_to_csv(file_name, restaurant_timings):               #writes data to csv file
    
    with open(file_name, 'w', encoding='utf-8') as csv_output:
        
        # separating the delimiter and words
        writer = csv.writer(csv_output, delimiter=',', quoting=csv.QUOTE_NONE, lineterminator='\n',escapechar='\\')
        
        # giving title
        writer.writerow(("Name of Restaurant", 'City', 'Country Code', 'Day of Week', 'Start Time Hour', 'Start Time Minutes', 'End Time Hour', 'End Time Minutes'))
        for row in restaurant_timings:
            writer.writerow(row)
    
        #closes connection
        csv_output.close()

In [14]:
#Reads all the json files
restaurant_timings = []
for filename in glob.glob(r'C:\Users\rajas\Downloads\DataAnalysis4Python_Spring17-master\Assignment 2\Data\Data\*.json'):
    with open(filename) as f:
        
        # Get data from json file
        data_from_file = json.load(f)
        
        if data_from_file["term"] == 'restaurants':         # sees if it is restaurant or not
            
            
            try:
                details = [value for value in data_from_file["hours"]]
            
            except:                                          #Create a dummy row in case if it is not present
                details = [{"open":[{"day": "NA","start": "NANA","end": "NANA"}]}]
                
            restaurant_timings.extend(add_rows(data_from_file, details[0]["open"]))              #Populate the data in a list
            
write_to_csv('restaurant_timings.csv', restaurant_timings)