# CSCE 470 Project Checkpoint 2 Core Algorithm

## Import Packages

In [2]:
!pip install textblob

Collecting textblob
  Obtaining dependency information for textblob from https://files.pythonhosted.org/packages/02/07/5fd2945356dd839974d3a25de8a142dc37293c21315729a41e775b5f3569/textblob-0.18.0.post0-py3-none-any.whl.metadata
  Downloading textblob-0.18.0.post0-py3-none-any.whl.metadata (4.5 kB)
Downloading textblob-0.18.0.post0-py3-none-any.whl (626 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m626.3/626.3 kB[0m [31m12.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: textblob
Successfully installed textblob-0.18.0.post0


In [62]:
import re
import pandas as pd
import requests
import os
import json
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from textblob import TextBlob
from dotenv import load_dotenv
# from forex_python.converter import CurrencyRates

load_dotenv()

GOOGLE_PLACES_API_KEY = os.environ.get('GOOGLE_PLACES_API_KEY')
EXCHANGE_RATE_API_KEY = os.environ.get('EXCHANGE_RATE_API_KEY')

## Authenticate with Amadeus

In [54]:
def get_amadeus_token(client_id, client_secret):
    url = "https://test.api.amadeus.com/v1/security/oauth2/token"
    headers = {'Content-Type': 'application/x-www-form-urlencoded'}
    body = {
        'grant_type': 'client_credentials',
        'client_id': client_id,
        'client_secret': client_secret
    }
    
    response = requests.post(url, headers=headers, data=body)
    
    if response.status_code == 200:
        token = response.json()['access_token']
        print("Successfully authenticated.")
        return token
    else:
        print(f"Error: {response.status_code}, {response.text}")
        return None

client_id = os.environ.get('AMADEUS_CLIENT_ID')
client_secret = os.environ.get('AMADEUS_CLIENT_SECRET')

AMADEUS_TOKEN = get_amadeus_token(client_id, client_secret)

Successfully authenticated.


## Currency Converter

In [49]:
# def convert_currency(amount, base_currency, target_currency):
#     currency_rates = CurrencyRates()
#     converted_amount = currency_rates.convert(base_currency, target_currency, amount)
#     return converted_amount

In [59]:
def convert_currency(amount, base_currency, target_currency, api_key):
    url = f"https://v6.exchangerate-api.com/v6/{api_key}/pair/{base_currency}/{target_currency}/{amount}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()['conversion_result']
    return 0.0

## Load Datasets

`country_info_extracted_data.csv` contains four main columns:
- Country: Name of the country.
- Safety and Security: Descriptions of safety and security levels in each country.
- Health: Information on health services, including emergency response availability.
- Local Laws: Information on local laws and possible penalties.

In [6]:
country_data = pd.read_csv('country_info_extracted_data.csv')
country_data.head()

Unnamed: 0,Country,Safety and Security,Health,Local Laws
0,Afghanistan,The security situation is extremely unstable a...,"For emergency services in Afghanistan, dial 11...",Criminal Penalties: U.S. citizens in Afghanist...
1,Albania,Terrorism: Some terrorist groups and those ins...,"For emergency services in Albania, dial 112 fo...",Criminal Penalties: You are subject to local l...
2,Algeria,Terrorism: Terrorist groups and individuals in...,"For emergency services in Algeria, dial 021711...",Criminal Penalties: You are subject to local l...
3,Andorra,Terrorism: Terrorist groups and those inspired...,Good medical care is available in Andorra. Reg...,Criminal Penalties: You are subject to local l...
4,Angola,Both spontaneous and planned civil disturbance...,"Medical facilities and services, including eme...",Criminal Penalties: You are subject to local l...


`advisory_extracted_data.csv` contains two main columns:
- Country: Name of the country.
- Advisory Level: A rating given to a country that indicates the relative risk of travel to that country.

In [7]:
advisory_data = pd.read_csv('advisory_extracted_data.csv')
advisory_data.head()

Unnamed: 0,Country,Advisory Level
0,Turkey,Level 2: Exercise Increased Caution
1,The Gambia,Level 2: Exercise Increased Caution
2,Egypt,Level 3: Reconsider Travel
3,Canada,Level 1: Exercise Normal Precautions
4,Rwanda,Level 3: Reconsider Travel


## Preference Match

In [8]:
def get_google_places_data(location, api_key, place_type="tourist attraction"):
    """Fetch places from Google Places API for a specific location and place type."""
    url = f"https://maps.googleapis.com/maps/api/place/textsearch/json?query={place_type}+in+{location}&key={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json()['results']
    return []

In [9]:
def get_google_reviews(place_id, api_key):
    """Retrieve user reviews for a specific place using its place_id."""
    url = f"https://maps.googleapis.com/maps/api/place/details/json?place_id={place_id}&fields=reviews&key={api_key}"
    response = requests.get(url)
    if response.status_code == 200:
        return response.json().get('result', {}).get('reviews', [])
    return []

In [None]:
# def calculate_preference_match(user_preferences, google_data, api_key):
#     """
#     Calculate preference match score using reviews based on user preferences.
#     User preferences contain keywords like 'adventure', 'culture', etc.
#     """
#     total_score = 0
#     count = 0
#     for place in google_data:
#         place_id = place.get('place_id')
#         reviews = get_google_reviews(place_id, api_key)

#         for review in reviews:
#             review_text = review.get('text', '').lower()
#             review_sentiment = TextBlob(review_text).sentiment.polarity  # Sentiment range from -1 to 1
            
#             # Check for keyword relevance
#             keyword_matches = sum(1 for keyword in user_preferences["activities"] if keyword in review_text)
            
#             # Score weighted by keyword relevance and sentiment
#             if keyword_matches > 0:
#                 review_score = (review_sentiment + 1) * keyword_matches * 10  # Scale sentiment for 0-20 score per match
#                 total_score += review_score
#                 count += 1

#     # Normalize to a score out of 100
#     preference_match_score = (total_score / count) if count > 0 else 0
#     return min(100, preference_match_score)  # Cap score at 100

In [20]:
# Calculate preference match using Cosine Similarity

def calculate_preference_match(user_preferences, google_data, api_key):
    """
    Calculate preference match score using cosine similarity on reviews based on user preferences.
    User preferences contain keywords like 'adventure', 'culture', etc.
    """
    # Define user preferences as a single string for vectorization
    user_pref_text = " ".join(user_preferences.split()).lower()
    
    # Initialize total score and review count
    total_score = 0
    count = 0

    # Vectorizer to create keyword presence vectors for cosine similarity
    vectorizer = TfidfVectorizer().fit([user_pref_text])
    user_pref_vector = vectorizer.transform([user_pref_text]).toarray()

    for place in google_data:
        place_id = place.get('place_id')
        reviews = get_google_reviews(place_id, api_key)
        
        for review in reviews:
            review_text = review.get('text', '').lower()
            review_sentiment = TextBlob(review_text).sentiment.polarity  # Sentiment range from -1 to 1

            # Transform review into vector space using the same vectorizer
            review_vector = vectorizer.transform([review_text]).toarray()
            
            # Calculate cosine similarity between user preferences and review text
            similarity = cosine_similarity(user_pref_vector, review_vector)[0][0]
            
            # Weight similarity by sentiment to emphasize positive reviews
            weighted_similarity_score = similarity * (review_sentiment + 1)  # Scale sentiment

            # Accumulate the weighted similarity score
            if similarity > 0:  # Only consider relevant reviews with similarity > 0
                total_score += weighted_similarity_score * 100  # Scale up for clarity
                count += 1

    # Average and cap the preference match score to 0-100
    preference_match_score = (total_score / count) if count > 0 else 0
    return min(100, preference_match_score)

## Safety Score

To create a safety score for solo travelers:
1. Analyze Safety Descriptions: Assign scores based on keywords like "unstable," "terrorism," or "civil disturbance," where more concerning keywords reduce the score.
2. Health Services: Factor in whether emergency health services are mentioned. Limited or no emergency services would reduce the score.
3. Local Laws: Keywords suggesting stricter local laws or heightened penalties could slightly impact the score for travelers.

In [None]:
# def calculate_safety_score(row):
#     # Initial score
#     score = 100  # Start with 100 (safer)

#     # Analyze "Safety and Security" descriptions
#     safety_text = row['Safety and Security'].lower()
#     if re.search(r"unstable|civil disturbance|violent|high crime|terrorism|unsafe", safety_text):
#         score -= 30  # Major safety concerns reduce score more heavily
#     elif re.search(r"low crime|safe|stable", safety_text):
#         score += 10  # Positive safety indicators increase score slightly

#     # Analyze "Health" descriptions
#     health_text = row['Health'].lower()
#     if re.search(r"limited|no emergency services|poor medical care", health_text):
#         score -= 20  # Lack of emergency services is a negative indicator
#     elif re.search(r"good medical care|emergency services available", health_text):
#         score += 10  # Availability of health services is a positive indicator

#     # Analyze "Local Laws" descriptions
#     laws_text = row['Local Laws'].lower()
#     if re.search(r"strict|harsh penalties|unpredictable", laws_text):
#         score -= 10  # Strict or harsh laws slightly decrease score for travelers
#     elif re.search(r"standard|clear guidelines|tourist friendly", laws_text):
#         score += 5  # Positive legal environment indicator

#     # Ensure the score is within the range 0 to 100
#     return max(0, min(100, score))

In [None]:
# # Apply the scoring function to each row
# country_data['Safety Score'] = country_data.apply(calculate_safety_score, axis=1)

# # Display a preview of the data with the new Safety Score column
# country_data[['Country', 'Safety Score']].head()

In [None]:
# # Save data with new Safety Score column to CSV
# country_data.to_csv('country_safety_score_data.csv', index=False)

In [11]:
# Calculate Safety Score with Sentiment Analysis
def calculate_safety_score(row):
    """Analyze safety sentiment and return a score based on safety description."""
    safety_text = row['Safety and Security']
    blob = TextBlob(safety_text)
    sentiment = blob.sentiment.polarity  # Range from -1 (negative) to 1 (positive)
    base_score = 100 + (sentiment * 50)  # Center at 100, modify by sentiment
    return max(0, min(100, base_score))

## Budget Fit

In [42]:
def get_amadeus_hotel_data(latitude, longitude, token):
    """Fetch hotel data from Amadeus API for a specific location."""
    url = f"https://test.api.amadeus.com/v1/reference-data/locations/hotels/by-geocode?latitude={latitude}&longitude={longitude}"
    headers = {'Authorization': f'Bearer {token}'}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        return [hotel['hotelId'] for hotel in response.json()['data'][:20]]
    else:
        print(f"Error fetching hotel data: {response.status_code}, {response.text}")
        return []

In [63]:
def get_hotel_prices(hotel_ids, token):
    """Retrieve hotel prices for a list of hotels using their hotel_ids."""
    url = f"https://test.api.amadeus.com/v3/shopping/hotel-offers?hotelIds={hotel_ids}"
    headers = {'Authorization': f'Bearer {token}'}
    response = requests.get(url, headers=headers)
    if response.status_code == 200:
        # Extracting the base price for each hotel
        base_prices = []
        for hotel_offer in response.json()['data']:
            if 'offers' in hotel_offer:
                for offer in hotel_offer['offers']:
                    if 'base' in offer['price']:
                        base_price = offer['price']['base']  # Accessing the base price
                        base_prices.append(base_price)
#         currency = response.json()['data'][0]['offers'][0]['price']['currency']
        return base_prices
    else:
        print(f"Error fetching hotel prices: {response.status_code}, {response.text}")
        return []

In [64]:
def calculate_budget_fit(hotel_ids, user_budget, token):
    """Compare the user's budget with average hotel costs."""
    prices = get_hotel_prices(hotel_ids, token)

    budget_fit_score = 0
    # converted_user_budget = convert_currency(user_budget, 'USD', currency, EXCHANGE_RATE_API_KEY)
    if prices:
        avg_cost = sum(float(price) for price in prices) / len(prices)
        budget_fit_score = max(0, min(100, (user_budget / avg_cost) * 100))  # Normalize to 100
    return budget_fit_score

## Social Score

In [15]:
def calculate_social_score(places):
    """
    Calculate a social score based on Google Places ratings and user ratings count.
    The higher the rating and number of ratings, the higher the social score.
    """
    total_score = 0
    count = 0
    for place in places:
        rating = place.get('rating', 0)
        user_ratings_total = place.get('user_ratings_total', 0)

        # Score contribution from rating and popularity (user ratings count)
        place_social_score = rating * (user_ratings_total / 10)  # Scale by count

        # Normalize to cap each place's impact, then add to total
        total_score += min(place_social_score, 100)  # Cap score to prevent outliers
        count += 1

    # Normalize to 0-100 if places are found, else return 0
    return min(100, (total_score / count) if count > 0 else 0)

## Rank Destinations

In [69]:
# Multi-Criteria Scoring
def rank_destinations(user_preferences, countries, coordinates, currencies, google_api_key, amadeus_api_key, weights):
    """
    Rank destinations based on weighted criteria using user preferences.
    weights = (preference_weight, safety_weight, social_weight, budget_weight)
    """
    ranked_destinations = []
    for country, coordinate, currency in zip(countries, coordinates, currencies):
        row = country_data[country_data['Country'] == country].iloc[0]

        # Get data from Google Places for the country
        google_data = get_google_places_data(country, google_api_key)

        # Get data from Amadeus
        amadeus_data = get_amadeus_hotel_data(coordinate[0], coordinate[1], amadeus_api_key)
        
        # Calculate individual scores
        pref_match_score = calculate_preference_match(user_preferences["activities"], google_data, google_api_key)
        safety_score = calculate_safety_score(row)
        user_budget = convert_currency(user_preferences["budget"], 'USD', currency, EXCHANGE_RATE_API_KEY)
        budget_fit_score = calculate_budget_fit(amadeus_data, user_budget, amadeus_api_key)
        social_score = calculate_social_score(google_data)

        # Compute final score using the weighted criteria
        final_score = (
            weights[0] * pref_match_score +
            weights[1] * safety_score +
            weights[2] * social_score +
            weights[3] * budget_fit_score
        ) / sum(weights)  # Normalize by total weight

        ranked_destinations.append((country, final_score, {
            "Preference Match Score": pref_match_score,
            "Safety Score": safety_score,
            "Social Score": social_score,
            "Budget Fit Score": budget_fit_score
        }))

    # Sort by final score, descending
    ranked_destinations.sort(key=lambda x: x[1], reverse=True)
    return ranked_destinations

In [70]:
# Usage example:
user_preferences = {
    "activities": "adventure culture relaxation",
    "budget": 200, # currently budget for one night hotel
    "safety": "medium",
    "social_level": "medium",
    "trip_duration": 7
}

# Define user weighting for each criterion
weights = (0.4, 0.3, 0.2, 0.1)  # Example weights for Preference Match, Safety, Social, Budget

# List of country names to evaluate
countries = ["Singapore", "Japan", "Thailand"]
# Coordinates of the countries
coordinates = [["1.352100", "103.819800"], ["35.6764", "139.6500"], ["13.7563", "100.5018"]]
# Currency of the countries
currencies = ["SGD", "JPY", "THB"]

# Rank the destinations
ranked_results = rank_destinations(user_preferences, countries, coordinates, currencies, GOOGLE_PLACES_API_KEY, AMADEUS_TOKEN, weights)

# Display ranked results
for destination in ranked_results:
    print(f"{destination[0]}: Final Score {destination[1]}")
    print("Details:", destination[2])


SGD
JPY
THB
Singapore: Final Score 84.91287033915717
Details: {'Preference Match Score': 72.80591008212713, 'Safety Score': 100, 'Social Score': 99.171, 'Budget Fit Score': 59.56306306306306}
Thailand: Final Score 83.2077323914423
Details: {'Preference Match Score': 76.14483097860571, 'Safety Score': 100, 'Social Score': 63.749000000000024, 'Budget Fit Score': 100}
Japan: Final Score 81.00913810099337
Details: {'Preference Match Score': 73.45470015735285, 'Safety Score': 100, 'Social Score': 90.97, 'Budget Fit Score': 34.332580380522124}
