In [None]:
import requests
import json
import feedparser
from bs4 import BeautifulSoup

# Function to remove HTML tags
def remove_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    return soup.get_text()

def jobicy(url, headers):
    job_list = []
    try:
        # Fetch the RSS data with headers
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check for any errors in the request

        # Parse RSS data
        feed = feedparser.parse(response.text)

        # Loop through entries and create a dictionary for each job
        for entry in feed.entries:
            job_dict = {
                'FT_Title': entry.title,
                'FT_Published_Date': entry.published,
                'FT_Summary': entry.summary,
                'FT_Link': entry.link
            }
            job_list.append(job_dict)

        return job_list

    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Error:", err)

def remote_ok(url):
    try:
        # Fetch the JSON data
        response = requests.get(url)
        response.raise_for_status()  # Check for any errors in the request

        # Load JSON data
        data = response.json()

        # Extracting relevant data from JSON
        jobs = []
        for job in data:
            job_dict = {
                'FT Title': job.get('slug', 'NA'),
                'FT Job-Description': remove_html_tags(job.get('description','NA')),
                'FT Compnay-Name': job.get('company','NA'),
                'FT Job-Category': job.get('position', 'NA'),  # Not provided in this API
                'FT Job-Type': job.get('job_type','NA'),
                'FT Job-Location': job.get('location', 'NA'),
                'FT Job-Tags': job.get('tags','NA'),
                'FT Job-Salary-min': job.get('salary_min','NA'),
                'FT Job-Salary-max': job.get('salary_max','NA'),
                'FT Apply-Url': job.get('apply_url','NA'),
                'FT Job-Published-Date': job.get('date','NA')
            }
            jobs.append(job_dict)

        return jobs[1:]  #This is done because we want to exclude the first output

    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Error:", err)

def remotive(url):
    try:
        # Fetch the JSON data
        response = requests.get(url)
        response.raise_for_status()  # Check for any errors in the request

        # Load JSON data
        data = response.json()

        # Extracting relevant data from JSON
        jobs = []
        for job in data['jobs']:
            job_dict = {
                'FT Title': job['title'],
                'FT Job-Description': remove_html_tags(job['description']),
                'FT Compnay-Name': job['company_name'],
                'FT Job-Category': job['category'],
                'FT Job-Type': job['job_type'],
                'FT Job-Location': job.get('job_location', 'NA'),  # Using get() with a default value
                'FT Job-Tags': job['tags'],
                'FT Job-Salary-min': job.get('salary_min','NA'),
                'FT Job-Salary-max': job.get('salary_max','NA'),
                'FT Apply-Url': job['url'],
                'FT Job-Published-Date': job['publication_date']
            }
            jobs.append(job_dict)

        return jobs

    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Error:", err)

def try_remotely(url, headers):
    try:
        # Fetch the JSON data with headers
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check for any errors in the request

        # Load JSON data
        data = response.json()

        # Extracting relevant data from JSON
        jobs = []
        for job in data['jobs']:
            job_dict = {
                'FT Title': job['title'],
                'FT Job-Description': remove_html_tags(job['description']),
                'FT Compnay-Name': job['companyName'],
                'FT Job-Category': job['mainCategory'],
                'FT Job-Type': job['jobType'],
                'FT Job-Location': job['locations'],
                'FT Job-Tags': job['tags'],
                'FT Job-Salary-min': job['minSalary'],
                'FT Job-Salary-max': job['maxSalary'],
                'FT Apply-Url': job['applicationLink'],
                'FT Job-Published-Date': job['pubDate']
            }
            jobs.append(job_dict)

        return jobs

    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Error:", err)

# Call the functions one by one with their respective URLs and headers
if __name__ == "__main__":

    rss_url = "https://jobicy.com/?feed=job_feed"
    headers4 = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
    }
    job_data = jobicy(rss_url, headers4)
    #print("\nJob Data from jobicy:")
    for job in job_data:
        print(job)
    
    json_url3 = "https://remoteok.com/api"
    formatted_data3 = remote_ok(json_url3)
    #print("\nFormatted Data from remote_ok:")
    for job in formatted_data3:
        print(job)

    json_url2 = "https://remotive.com/api/remote-jobs"
    formatted_data2 = remotive(json_url2)
    #print("\nFormatted Data from remotive:")
    for job in formatted_data2:
        print(job)

    json_url1 = "https://tryremotely.com/api/v1"
    headers1 = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
    }
    formatted_data1 = try_remotely(json_url1, headers1)
    #print("\nFormatted Data from try_remotely:")
    for job in formatted_data1:
        print(job)