In [None]:
import feedparser
import requests
from datetime import datetime, timezone
from bs4 import BeautifulSoup

# Function to remove HTML tags
def remove_html_tags(text):
    soup = BeautifulSoup(text, "html.parser")
    return soup.get_text()

def jobicy(url, headers, start_date, end_date):
    job_list=[]
    try:
        # Fetch the RSS data with headers
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check for any errors in the request

        # Parse RSS data
        feed = feedparser.parse(response.text)

        # Make start_datetime and end_datetime timezone-aware
        start_datetime = start_date.replace(tzinfo=timezone.utc)
        end_datetime = end_date.replace(tzinfo=timezone.utc)

        # Iterate through entries
        for entry in feed.entries:
            # Convert entry published date to datetime
            entry_published = datetime.strptime(entry.published, '%a, %d %b %Y %H:%M:%S %z')
            
            # Check if the entry's published date is within the specified range
            if start_datetime <= entry_published <= end_datetime:
                job_dict = {
                    'FT_Title': entry.title,
                    'FT_Published_Date': entry.published,
                    'FT_Summary': remove_html_tags(entry.summary),
                    'FT_Link': entry.link
                }
                job_list.append(job_dict)

        return job_list

    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Error:", err)

def remote_ok(url, start_date=None, end_date=None):
    try:
        # Fetch the JSON data
        response = requests.get(url)
        response.raise_for_status()  # Check for any errors in the request

        # Load JSON data
        data = response.json()

        # Extracting relevant data from JSON
        jobs = []
        for job in data:
            job_published_date_str = job.get('date', 'NA')
            
            # Handle case where date is 'NA'
            if job_published_date_str == 'NA':
                job_published_date = datetime.now(timezone.utc)  # Default to current time
            else:
                job_published_date = datetime.strptime(job_published_date_str, '%Y-%m-%dT%H:%M:%S%z')
            
            # Check if job published date is within the specified range
            if start_date and end_date:
                if not (start_date <= job_published_date <= end_date):
                    continue  # Skip this job if not within the date range
            
            job_dict = {
                'FT Title': job.get('slug', 'NA'),
                'FT Job-Description': remove_html_tags(job.get('description','NA')),
                'FT Compnay-Name': job.get('company','NA'),
                'FT Job-Category': job.get('position', 'NA'),  # Not provided in this API
                'FT Job-Type': job.get('job_type','NA'),
                'FT Job-Location': job.get('location', 'NA'),
                'FT Job-Tags': job.get('tags','NA'),
                'FT Job-Salary-min': job.get('salary_min','NA'),
                'FT Job-Salary-max': job.get('salary_max','NA'),
                'FT Apply-Url': job.get('apply_url','NA'),
                'FT Job-Published-Date': job_published_date.strftime('%Y-%m-%d %H:%M:%S %Z')
            }
            jobs.append(job_dict)

        return jobs

    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Error:", err)

def remotive(url, start_date=None, end_date=None):
    try:
        # Fetch the JSON data
        response = requests.get(url)
        response.raise_for_status()  # Check for any errors in the request

        # Load JSON data
        data = response.json()

        # Extracting relevant data from JSON
        jobs = []
        for job in data['jobs']:
            job_published_date_str = job['publication_date']
            job_published_date = datetime.strptime(job_published_date_str, '%Y-%m-%dT%H:%M:%S')

            # Check if job published date is within the specified range
            if start_date and end_date:
                if not (start_date <= job_published_date <= end_date):
                    continue  # Skip this job if not within the date range
            
            job_dict = {
                'FT Title': job['title'],
                'FT Job-Description': remove_html_tags(job['description']),
                'FT Compnay-Name': job['company_name'],
                'FT Job-Category': job['category'],
                'FT Job-Type': job['job_type'],
                'FT Job-Location': job.get('job_location', 'NA'),  # Using get() with a default value
                'FT Job-Tags': job['tags'],
                'FT Job-Salary-min': job.get('salary_min','NA'),
                'FT Job-Salary-max': job.get('salary_max','NA'),
                'FT Apply-Url': job['url'],
                'FT Job-Published-Date': job_published_date.strftime('%Y-%m-%d %H:%M:%S')
            }
            jobs.append(job_dict)

        return jobs

    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Error:", err)

def try_remotely(url, headers, start_date=None, end_date=None):
    try:
        # Fetch the JSON data with headers
        response = requests.get(url, headers=headers)
        response.raise_for_status()  # Check for any errors in the request

        # Load JSON data
        data = response.json()

        # Extracting relevant data from JSON
        jobs = []
        for job in data['jobs']:
            # Convert Unix timestamp to datetime for 'updated_at' field
            updated_at_timestamp = job.get('updated_at')
            if updated_at_timestamp:
                updated_at_date = datetime.fromtimestamp(updated_at_timestamp)
            else:
                updated_at_date = None

            # Convert 'pubDate' to datetime
            if isinstance(job['pubDate'], int):  # Check if 'pubDate' is an integer
                job_published_date = datetime.fromtimestamp(job['pubDate'])
            else:
                job_published_date = datetime.strptime(job['pubDate'], '%Y-%m-%dT%H:%M:%S%z')

            # Check if job updated date is within the specified range
            if start_date and end_date and updated_at_date:
                if not (start_date <= updated_at_date <= end_date):
                    continue  # Skip this job if not within the date range
            
            job_dict = {
                'FT Title': job['title'],
                'FT Job-Description': remove_html_tags(job['description']),
                'FT Compnay-Name': job['companyName'],
                'FT Job-Category': job['mainCategory'],
                'FT Job-Type': job['jobType'],
                'FT Job-Location': job['locations'],
                'FT Job-Tags': job['tags'],
                'FT Job-Salary-min': job['minSalary'],
                'FT Job-Salary-max': job['maxSalary'],
                'FT Apply-Url': job['applicationLink'],
                'FT Job-Published-Date': job_published_date.strftime('%Y-%m-%d %H:%M:%S %Z'),
                #'FT Job-Updated-Date': updated_at_date.strftime('%Y-%m-%d %H:%M:%S %Z') if updated_at_date else None
            }
            jobs.append(job_dict)

        return jobs

    except requests.exceptions.HTTPError as errh:
        print("HTTP Error:", errh)
    except requests.exceptions.ConnectionError as errc:
        print("Error Connecting:", errc)
    except requests.exceptions.Timeout as errt:
        print("Timeout Error:", errt)
    except requests.exceptions.RequestException as err:
        print("Error:", err)

def get_all_jobs():
    # URL of the RSS feed
    rss_url = "https://jobicy.com/?feed=job_feed"

    # Headers
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
    }

    # Input date range as datetime objects
    start_date = datetime(2024, 3, 14, 0, 0, 0)# Here the date will be of yesterday for example today is 10 so date will be 9(Note:Date is in YYYY,MM,DD format,we don't have to work with timeline i.e. 0,0,0).
    end_date = datetime(2024, 3, 15, 0, 0, 0)# Here the date will be of present day for eaxmple today is 10 so date will be 10.

    # Call the function with the URL, headers, and date range
    feed_info = jobicy(rss_url, headers, start_date, end_date)

    # Print filtered entries from the RSS feed
    #print("\nEntries from jobicy:")
    for job in feed_info:
        print(job)

    # URL of the JSON file
    remote_ok_url = "https://remoteok.com/api"
    remote_ok_start_date = datetime(2024, 3, 14, 0, 0, 0, tzinfo=timezone.utc)# Here the date will be of yesterday for example today is 10 so date will be 9(Note:Date is in YYYY,MM,DD format,we don't have to work with timeline i.e. 0,0,0).
    remote_ok_end_date = datetime(2024, 3, 15, 0, 0, 0, tzinfo=timezone.utc)# Here the date will be of present day for eaxmple today is 10 so date will be 10.
    formatted_data_remote_ok = remote_ok(remote_ok_url, remote_ok_start_date, remote_ok_end_date)

    # Print the formatted data
    #print("\nEntries from remote_ok:")
    for job in formatted_data_remote_ok:
        print(job)

    # URL of the JSON file
    remotive_url = "https://remotive.com/api/remote-jobs"
    remotive_start_date = datetime(2024, 3, 14, 0, 0, 0)# Here the date will be of yesterday for example today is 10 so date will be 9(Note:Date is in YYYY,MM,DD format,we don't have to work with timeline i.e. 0,0,0).
    remotive_end_date = datetime(2024, 3, 15, 0, 0, 0)# Here the date will be of present day for eaxmple today is 10 so date will be 10.
    formatted_data_remotive = remotive(remotive_url, remotive_start_date, remotive_end_date)

    # Print the formatted data
    #print("\nEntries from remotive:")
    for job in formatted_data_remotive:
        print(job)

    # URL of the JSON file
    try_remotely_url = "https://tryremotely.com/api/v1"

    # Headers
    try_remotely_headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36'
    }

    # Define date range (if needed)
    try_remotely_start_date = datetime(2024, 3, 14, 0, 0, 0)# Here the date will be of yesterday for example today is 10 so date will be 9(Note:Date is in YYYY,MM,DD format,we don't have to work with timeline i.e. 0,0,0).
    try_remotely_end_date = datetime(2024, 3, 15, 0, 0, 0)# Here the date will be of present day for eaxmple today is 10 so date will be 10.

    # Call the function with the URL, headers, and date range
    formatted_data_try_remotely = try_remotely(try_remotely_url, try_remotely_headers, try_remotely_start_date, try_remotely_end_date)

    # Print the formatted data
    #print("\nEntries from try_remotely:")
    for job in formatted_data_try_remotely:
        print(job)

# Call the function to get all jobs
get_all_jobs()
