# Travel Advisory Data

## Import Packages

In [1]:
!pip install xmltodict



In [2]:
!pip install beautifulsoup4



In [3]:
import json
import re
import xmltodict
import pandas as pd
from bs4 import BeautifulSoup

## Load Datasets

In [4]:
def load_json_data(json_file_path):
    with open(json_file_path, 'r') as f:
        data = json.load(f)
    return data

def load_xml_data(xml_file_path):
    with open(xml_file_path, 'r') as f:
        xml_content = f.read()
        # Replace &nbsp; with its numeric reference
        xml_content = xml_content.replace("&nbsp;", "&#160;")
        # Fix unclosed <br> tags
        xml_content = xml_content.replace('<br _rte_temp_br="brEOB">', '<br _rte_temp_br="brEOB" />')
        xml_content = xml_content.replace('<br>', '<br />')
        # Use regex to find all <img ...> tags that are not self-closed
        # Pattern explanation: find <img ...> tags without a closing /
        xml_content = re.sub(r'<img(.*?)(?<!/)>', r'<img\1/>', xml_content)
        data = xmltodict.parse(xml_content)
    return data

In [5]:
country_travel_info_json_data = load_json_data('./country_travel_info.json')
travel_advisory_xml_data = load_xml_data('./travel_advisory.xml')

## Extract Data

In [6]:
def clean_html(html_content):
    # Parse HTML and extract text
    soup = BeautifulSoup(html_content, 'html.parser')
    return soup.get_text(separator=' ', strip=True)

def extract_json_data(json_data):
    extracted_data = []
    for country_info in json_data:
        geopolitical_area = country_info.get('geopoliticalarea')
        safety_and_security = clean_html(country_info.get('safety_and_security', 'No info available'))
        health = clean_html(country_info.get('health', 'No info available'))
        local_laws = clean_html(country_info.get('local_laws_and_special_circumstances', 'No info available'))
        
        # Extract relevant information
        extracted_data.append({
            'Country': geopolitical_area,
            'Safety and Security': safety_and_security,
            'Health': health,
            'Local Laws': local_laws
        })
    return extracted_data

def extract_xml_data(xml_data):
    extracted_data = []
    entries = xml_data['feed']['entry']
    
    for entry in entries:
        title_text = entry['title'].get('#text', '')
        country_name = title_text.split(' - ')[0].strip()
        advisory_level = ''
        for category in entry['category']:
            if category.get('@label') == 'Threat-Level':
                advisory_level = category.get('@term', '')
                break
#         summary_html = entry['summary'].get('#text', '')
#         summary = BeautifulSoup(summary_html, 'html.parser').get_text()
        
        # Extract relevant information
        extracted_data.append({
            'Country': country_name,
            'Advisory Level': advisory_level,
#             'Summary': summary
        })
    return extracted_data

In [7]:
country_info_extracted_data = extract_json_data(country_travel_info_json_data)
advisory_extracted_data = extract_xml_data(travel_advisory_xml_data)

In [8]:
df_country_info = pd.DataFrame(country_info_extracted_data)
df_advisory = pd.DataFrame(advisory_extracted_data)

## Save Data to CSV

In [None]:
df_country_info.to_csv('country_info_extracted_data.csv', index=False)
df_advisory.to_csv('advisory_extracted_data.csv', index=False)

## Data Exploration & Preparation

In [9]:
df_country_info.head()

Unnamed: 0,Country,Safety and Security,Health,Local Laws
0,Afghanistan,The security situation is extremely unstable a...,"For emergency services in Afghanistan, dial 11...",Criminal Penalties: U.S. citizens in Afghanist...
1,Albania,Terrorism: Some terrorist groups and those ins...,"For emergency services in Albania, dial 112 fo...",Criminal Penalties: You are subject to local l...
2,Algeria,Terrorism: Terrorist groups and individuals in...,"For emergency services in Algeria, dial 021711...",Criminal Penalties: You are subject to local l...
3,Andorra,Terrorism: Terrorist groups and those inspired...,Good medical care is available in Andorra. Reg...,Criminal Penalties: You are subject to local l...
4,Angola,Both spontaneous and planned civil disturbance...,"Medical facilities and services, including eme...",Criminal Penalties: You are subject to local l...


In [10]:
df_advisory.head()

Unnamed: 0,Country,Advisory Level
0,Turkey,Level 2: Exercise Increased Caution
1,The Gambia,Level 2: Exercise Increased Caution
2,Egypt,Level 3: Reconsider Travel
3,Canada,Level 1: Exercise Normal Precautions
4,Rwanda,Level 3: Reconsider Travel
