 ## Install necessary libraries

 Faker
 Pandas
 

#### Importing Libraries

In [1]:
import pandas as pd
import random
from faker import Faker
import json

## Initialize Faker to generate mock data

In [2]:

fake = Faker()

def generate_mock_data(num_entries):
    """Generate a list of mock escape room data with additional fields."""
    data = []
    for _ in range(num_entries):
        entry = {
            'Escape Room Name': fake.company(),
            'Address': fake.address(),
            'Phone Number': fake.phone_number(),
            'URL of Escape Room': fake.url(),
            'Hours of Operation': f'{random.randint(9, 11)} AM - {random.randint(5, 11)} PM',
            'Reviews': random.randint(0, 500),
            'Links of Social Media': {
                'Facebook': fake.url(),
                'Twitter': fake.url(),
                'Instagram': fake.url()
            },
            'Average Salary': f'${random.randint(20000, 60000):,}',
            'Number of Employees': random.randint(5, 50),
            'Opening Year': random.randint(2000, 2023),
            'Website': fake.url(),
            'Email': fake.email(),
            'Google Reviews': random.randint(0, 1000),
            'Price Level': random.choice(['Free', 'Inexpensive', 'Moderate', 'Expensive', 'Very Expensive']),
            'Rating': round(random.uniform(1, 5), 1),
            'Type of Business': random.choice(['Escape Room', 'Adventure Park', 'Puzzle Room']),
            'Business Hours': f'{random.randint(9, 11)} AM - {random.randint(5, 11)} PM',
            'Google Business URL': fake.url()
        }
        data.append(entry)
    return data

def save_to_csv(data, filename):
    """Save the list of mock data to a CSV file."""
    # Flatten the 'Links of Social Media' dictionary into separate columns
    flattened_data = []
    for entry in data:
        entry_flat = entry.copy()
        entry_flat.update(entry_flat.pop('Links of Social Media'))
        flattened_data.append(entry_flat)
    
    df = pd.DataFrame(flattened_data)
    df.to_csv(filename, index=False)
    print(f'Data saved to {filename}')



#### Extract Columns Escape Room Name, Address, Phone Number, Url of Escape Room, Hours of operation, Reviews, and Social Links store in extract_data.csv file

In [3]:
import pandas as pd

def extract_columns_to_csv(csv_filename, new_csv_filename):
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(csv_filename)

        # Select the specific columns you want to include in the new CSV
        selected_columns = ['Escape Room Name', 'Address', 'Phone Number', 'URL of Escape Room', 
                             'Hours of Operation', 'Reviews', 'Facebook', 'Twitter', 'Instagram']
        
        # Check if all required columns are present
        missing_columns = [col for col in selected_columns if col not in df.columns]
        if missing_columns:
            raise ValueError(f"Missing columns: {', '.join(missing_columns)}")
        
        # Filter the DataFrame to include only the selected columns
        df_filtered = df[selected_columns]
        
        # Save the filtered DataFrame to a new CSV file
        df_filtered.to_csv(new_csv_filename, index=False)
        print(f'Data saved to {new_csv_filename}')
    
    except FileNotFoundError:
        print(f"Error: The file {csv_filename} was not found.")
    except ValueError as e:
        print(f"Error: {e}")
    except pd.errors.EmptyDataError:
        print("Error: The CSV file is empty.")
    except pd.errors.ParserError:
        print("Error: There was a problem parsing the CSV file.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")



### Implement basic data validation checks like:
All required columns
Missing values
Perform additional validation if needed (e.g., URL format, phone number format)
Ensure phone numbers are not empty


In [4]:

def validate_data(df):
    """Perform basic data validation checks."""
    required_columns = ['Escape Room Name', 'Address', 'Phone Number', 'URL of Escape Room', 
                         'Hours of Operation', 'Reviews', 'Facebook', 'Twitter', 'Instagram']
    
    # Check if all required columns are present
    missing_columns = [col for col in required_columns if col not in df.columns]
    if missing_columns:
        raise ValueError(f"Missing columns: {', '.join(missing_columns)}")
    
    # Check for missing values in the required columns
    if df[required_columns].isnull().any().any():
        raise ValueError("Data contains missing values in required columns.")
    
    # Perform additional validation if needed (e.g., URL format, phone number format)
    # Example: Ensure URLs start with 'http' or 'https'
    invalid_urls = df[~df['URL of Escape Room'].str.startswith(('http://', 'https://'))]
    if not invalid_urls.empty:
        raise ValueError("Invalid URLs found in 'URL of Escape Room' column.")
    
    # Example: Ensure phone numbers are not empty (simple check)
    if df['Phone Number'].str.strip().eq('').any():
        raise ValueError("Empty phone numbers found.")
    
    return True



#### Convert Extract Csv Data into Json file has at least 100 entries and also Handle exceptions and errors gracefully.

In [5]:
def extract_columns_to_json(csv_filename, json_filename):
    """Extract specified columns from CSV and save to JSON format."""
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv('mock_escape_rooms.csv')
        
        # Validate data
        validate_data(df)

        # Select the specific columns to include in the JSON
        selected_columns = ['Escape Room Name', 'Address', 'Phone Number', 'URL of Escape Room', 
                             'Hours of Operation', 'Reviews', 'Facebook', 'Twitter', 'Instagram']
        df_filtered = df[selected_columns]
        
        # Convert the DataFrame to JSON format
        json_data = df_filtered.to_json(orient='records', lines=True)
        
        # Save the JSON data to a file
        with open(json_filename, 'w') as f:
            f.write(json_data)
        print(f'Data saved to {json_filename}')
    
    except FileNotFoundError:
        print(f"Error: The file {csv_filename} was not found.")
    except ValueError as e:
        print(f"Data validation error: {e}")
    except pd.errors.EmptyDataError:
        print("Error: The CSV file is empty.")
    except pd.errors.ParserError:
        print("Error: There was a problem parsing the CSV file.")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")




#####  Creating the main function and Calling all the higher function inside the main function 

In [6]:
def main():
  # Extracting 100 Escape and GMB entries from faker library
    num_entries = 100  # Number of entries to generate
    mock_data = generate_mock_data(num_entries)
    save_to_csv(mock_data, 'mock_escape_rooms.csv')

  # Etract Some columns from mock_escape_rooms
    csv_filename = 'mock_escape_rooms.csv'
    new_csv_filename = 'extract_data.csv'
    extract_columns_to_csv(csv_filename, new_csv_filename)

  # Convert Extract_data.csv file into escape_rooms.json
    csv_filename = 'extract_data.csv'
    json_filename = 'escape_rooms.json'
    extract_columns_to_json(csv_filename, json_filename)

if __name__ == '__main__':
    main()


Data saved to mock_escape_rooms.csv
Data saved to extract_data.csv
Data saved to escape_rooms.json


##### Important Note: Originally, I was using GMB (Google My Business), but when I tried to retrieve the Google Place API, there was a billing error. Google indicated that I would need to purchase the API for $200 USD. Consequently, I was able to use the Faker Library to extract escape room data. I then created a Python script to scrape the data from the Faker Library, extracting columns such as room name, phone number, address, and so forth, and storing the results in a separate file called extract_data.csv. I then validated a few tests, such as finding missing values, empty numbers, etc.Once the validation process is over, I convert the data into a JSON format with a sequence.

##### I've added a new file, requirements.txt, that lists the installed dependencies and their versions for this project.