# RICE-VIRT-DATA-PT-05-2022-U-B-MW Module 6 Challenge

# Deliverable 2: Create a Customer Travel Destinations Map
## Code Summary
- **Purpose  :** Collect and analyze weather data across cities worldwide, using 500 or more unique & random cities
- **Created  :** 2022 Jun 20 05:55:00 UTC (Meghan E. Hull)
- **Modified :** 2022 Jun 22 07:59:44 UTC (Meghan E. Hull)

## Dependencies

In [1]:
# Dependencies and Setup
import pandas as pd
import requests
import gmaps
import numpy as np
import time

# Add the directory above current directory to list of directories where Python will look for modules
import sys; sys.path.insert(0, '..')

## API Key

In [2]:
# Import the API key from main directory
from config import g_key

# Configure gmaps API key
gmaps.configure(api_key=g_key)

# Inputs
## Import / Export File Names

In [3]:
weather_csv_file="../Weather_Database/WeatherPy_database.csv"
output_data_file="WeatherPy_vacation.csv"
output_png_file="WeatherPy_vacation_map.png"

## 1. Import the WeatherPy_database.csv file.

In [4]:
city_data_df = pd.read_csv(weather_csv_file)
city_data_df.head()

Unnamed: 0,City_ID,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description
0,0,Bulgan,MN,48.8125,103.5347,72.9,42,52,5.46,broken clouds
1,1,Codrington,AU,-38.2667,141.9667,54.19,80,100,20.4,overcast clouds
2,2,Rikitea,PF,-23.1203,-134.9692,71.58,58,76,19.71,broken clouds
3,3,Alofi,NU,-19.0595,-169.9187,76.89,94,75,4.61,broken clouds
4,4,Hermanus,ZA,-34.4187,19.2345,44.8,84,0,13.2,clear sky


## 2. Prompt the user to enter minimum and maximum temperature criteria 

In [5]:
temp_min=float(input("Please enter the lowest Max Temperature (F):  "))
temp_max=float(input("Please enter the highest Max Temperature (F): "))

Please enter the lowest Max Temperature (F):  65
Please enter the highest Max Temperature (F): 85


## 3. Filter City Dataframe 

In [6]:
# 3. Filter the city_data_df DataFrame using the input statements to create a new DataFrame using the loc method.
city_search_df=city_data_df.loc[(city_data_df["Max Temp"]>=temp_min) & (city_data_df["Max Temp"]<=temp_max)]
city_search_df.head(10)

Unnamed: 0,City_ID,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description
0,0,Bulgan,MN,48.8125,103.5347,72.9,42,52,5.46,broken clouds
2,2,Rikitea,PF,-23.1203,-134.9692,71.58,58,76,19.71,broken clouds
3,3,Alofi,NU,-19.0595,-169.9187,76.89,94,75,4.61,broken clouds
7,7,Airai,TL,-8.9266,125.4092,72.23,43,65,4.29,broken clouds
8,8,Cairns,AU,-16.9167,145.7667,83.07,67,100,16.11,overcast clouds
11,11,Anloga,GH,5.7947,0.8973,77.32,87,52,7.96,broken clouds
15,15,Diffa,NE,13.3154,12.6113,81.05,53,48,11.01,scattered clouds
16,16,Geraldton,AU,-28.7667,114.6,68.41,64,0,5.75,clear sky
18,18,Maragogi,BR,-9.0122,-35.2225,73.56,90,28,3.38,scattered clouds
19,19,Awbari,LY,26.5921,12.7805,79.86,19,0,9.69,clear sky


## 4. Clean Up City Dataframe 

In [7]:
# 4a. Determine if there are any empty rows.
clean_df=city_search_df.copy()
clean_df.replace("",np.nan,inplace=True)
clean_df.isnull().sum()

City_ID                0
City                   0
Country                0
Lat                    0
Lng                    0
Max Temp               0
Humidity               0
Cloudiness             0
Wind Speed             0
Current Description    0
dtype: int64

In [8]:
# 4b. Drop any empty rows and create a new DataFrame that doesn’t have empty rows.
clean_df=clean_df.dropna()
clean_df.reset_index(drop=True,inplace=True)
clean_df.head(10)

Unnamed: 0,City_ID,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Current Description
0,0,Bulgan,MN,48.8125,103.5347,72.9,42,52,5.46,broken clouds
1,2,Rikitea,PF,-23.1203,-134.9692,71.58,58,76,19.71,broken clouds
2,3,Alofi,NU,-19.0595,-169.9187,76.89,94,75,4.61,broken clouds
3,7,Airai,TL,-8.9266,125.4092,72.23,43,65,4.29,broken clouds
4,8,Cairns,AU,-16.9167,145.7667,83.07,67,100,16.11,overcast clouds
5,11,Anloga,GH,5.7947,0.8973,77.32,87,52,7.96,broken clouds
6,15,Diffa,NE,13.3154,12.6113,81.05,53,48,11.01,scattered clouds
7,16,Geraldton,AU,-28.7667,114.6,68.41,64,0,5.75,clear sky
8,18,Maragogi,BR,-9.0122,-35.2225,73.56,90,28,3.38,scattered clouds
9,19,Awbari,LY,26.5921,12.7805,79.86,19,0,9.69,clear sky


# Find Nearby Hotels
## 5. Create Dataframe for Hotel 

In [9]:
# 5a. Create DataFrame called hotel_df to store hotel names along with city, country, max temp, and coordinates.
hotel_df = clean_df[["City", "Country", "Max Temp", "Current Description", "Lat", "Lng"]].copy()

# 5b. Create a new column "Hotel Name"
hotel_df["Hotel Name"] = ""
hotel_df.head(10)

Unnamed: 0,City,Country,Max Temp,Current Description,Lat,Lng,Hotel Name
0,Bulgan,MN,72.9,broken clouds,48.8125,103.5347,
1,Rikitea,PF,71.58,broken clouds,-23.1203,-134.9692,
2,Alofi,NU,76.89,broken clouds,-19.0595,-169.9187,
3,Airai,TL,72.23,broken clouds,-8.9266,125.4092,
4,Cairns,AU,83.07,overcast clouds,-16.9167,145.7667,
5,Anloga,GH,77.32,broken clouds,5.7947,0.8973,
6,Diffa,NE,81.05,scattered clouds,13.3154,12.6113,
7,Geraldton,AU,68.41,clear sky,-28.7667,114.6,
8,Maragogi,BR,73.56,scattered clouds,-9.0122,-35.2225,
9,Awbari,LY,79.86,clear sky,26.5921,12.7805,


## 6. Use Google Directions API to find a nearby hotel

In [10]:
# 6a. Set parameters to search for hotels with 5000 meters.
params = {
    "language": "en",
    "radius": 5000,
    "type": "lodging",
    "key": g_key
}

In [11]:
# Print the beginning of the logging.
print("Beginning Data Retrieval for " + str(len(hotel_df)) + " Cities")
print("--------------------------------------------------")

# Set start time (sec0nds)
t_o=time.time()

# Create counters.
record_count = 1
set_count = 1
skip_count = 0


# 6b. Iterate through the hotel DataFrame.
for i, city in hotel_df.iterrows():

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city['City']}")
    
    # Add 1 to the record count.
    record_count += 1

    # 6c. Get latitude and longitude from DataFrame
    lat=city["Lat"]
    lng=city["Lng"]
    
    # 6d. Set up the base URL for the Google Directions API to get JSON data.
    url = f"https://maps.googleapis.com/maps/api/place/nearbysearch/json?location={lat}%2C{lng}&language={params['language']}&radius={params['radius']}&type={params['type']}&key={params['key']}"

    # 6e. Make request and retrieve the JSON data from the search. 
    mysearch=requests.get(url).json()
    
    # 6f. Get the first hotel from the results and store the name, if a hotel isn't found skip the city.
    # Method 1 - Using try except (437.33 seconds to run)
#     try:
#         hotel_df.at[i,'Hotel Name']=mysearch['results'][0]['name']
    
#     # If an error is experienced, skip the city.
#     except Exception as e:
#         skip_count += 1
#         print("Hotel not found near city. Skipping...")
#         pass
    # Method 2 - Using status field (452.23 seconds to run)
#     if mysearch['status']=='OK':
#         hotel_df.at[i,'Hotel Name']=mysearch['results'][0]['name']
#     elif mysearch['status']=='ZERO_RESULTS':
#         skip_count += 1
#         print("Hotel not found near city. Skipping...")
#     else:
#         skip_count += 1
#         print(f"Error processing search for city ({mysearch['status']}). Skipping...")
    # Method 3 - Combination (441.92 seconds to run)
    try:
        if mysearch['status']=='OK':
            hotel_df.at[i,'Hotel Name']=mysearch['results'][0]['name']
        elif mysearch['status']=='ZERO_RESULTS':
            skip_count += 1
            print("Hotel not found near city. Skipping...")
        else:
            skip_count += 1
            print(f"Error processing search for city ({mysearch['status']}). Skipping...")
    except Exception as e:
        skip_count += 1
        print("Runtime error during processing. Re-executing may fix the issue. Skipping...")
        pass
    

# Set end time (sec0nds)
t_f=time.time()

# Find elapsed time (sec0nds)
t_del=t_f-t_o
t_del_hr=int(t_del/3600)
t_del_min=int(t_del/60)-t_del_hr*60
t_del_sec=round(t_del-t_del_hr*3600-t_del_min*60,2)
t_del_str=str(t_del_hr) + ":" + str(t_del_min) + ":" + str(t_del_sec) + " (" + str(round(t_del,2)) + " sec)"

# Indicate that Data Loading is complete.
print("--------------------------------------------------")
print("Data Retrieval Completed     ")
print("--------------------------------------------------")
print("Time Elapsed: " + t_del_str)
print("Hotels found for " + str(len(hotel_df)-skip_count) + " of " + str(len(hotel_df)) + " cities")
print("--------------------------------------------------")


Beginning Data Retrieval for 318 Cities
--------------------------------------------------
Processing Record 1 of Set 1 | Bulgan
Processing Record 2 of Set 1 | Rikitea
Processing Record 3 of Set 1 | Alofi
Processing Record 4 of Set 1 | Airai
Hotel not found near city. Skipping...
Processing Record 5 of Set 1 | Cairns
Processing Record 6 of Set 1 | Anloga
Processing Record 7 of Set 1 | Diffa
Processing Record 8 of Set 1 | Geraldton
Processing Record 9 of Set 1 | Maragogi
Processing Record 10 of Set 1 | Awbari
Processing Record 11 of Set 1 | Kinshasa
Processing Record 12 of Set 1 | Bodrum
Processing Record 13 of Set 1 | Luneville
Processing Record 14 of Set 1 | Kita
Processing Record 15 of Set 1 | Kapaa
Processing Record 16 of Set 1 | Hithadhoo
Processing Record 17 of Set 1 | Siocon
Processing Record 18 of Set 1 | Thompson
Processing Record 19 of Set 1 | Puerto Ayora
Processing Record 20 of Set 1 | Sovetskiy
Processing Record 21 of Set 1 | Atuona
Processing Record 22 of Set 1 | Robertspo

Processing Record 37 of Set 4 | Richards Bay
Processing Record 38 of Set 4 | Kiunga
Processing Record 39 of Set 4 | Belle Fourche
Processing Record 40 of Set 4 | Nijar
Processing Record 41 of Set 4 | Guayaramerin
Processing Record 42 of Set 4 | Yulara
Processing Record 43 of Set 4 | Ensley
Processing Record 44 of Set 4 | Puerto Carreno
Processing Record 45 of Set 4 | Mount Isa
Processing Record 46 of Set 4 | Ust-Koksa
Processing Record 47 of Set 4 | San Cristobal
Processing Record 48 of Set 4 | Pachino
Processing Record 49 of Set 4 | Kudahuvadhoo
Processing Record 50 of Set 4 | Rumonge
Processing Record 1 of Set 5 | Mayumba
Processing Record 2 of Set 5 | Bowen
Processing Record 3 of Set 5 | Baykit
Hotel not found near city. Skipping...
Processing Record 4 of Set 5 | Dodge City
Processing Record 5 of Set 5 | Hongjiang
Processing Record 6 of Set 5 | Saint-Pierre
Processing Record 7 of Set 5 | Shiyan
Processing Record 8 of Set 5 | Samarai
Processing Record 9 of Set 5 | Teahupoo
Processing

## 7. Drop the rows where there is no Hotel Name.

In [12]:
# 7a. Copy dataframe
clean_hotel_df=hotel_df.copy()
print(f"clean_hotel_df has {len(clean_hotel_df)} rows")
clean_hotel_df.tail(10)

clean_hotel_df has 318 rows


Unnamed: 0,City,Country,Max Temp,Current Description,Lat,Lng,Hotel Name
308,Cabanas,CU,75.52,light rain,22.9758,-82.9219,Villa Luis Montesino y Anabel
309,Sangar,RU,66.07,overcast clouds,63.9242,127.4739,
310,Amapa,BR,69.4,scattered clouds,1.0,-52.0,
311,Sinnamary,GF,76.64,overcast clouds,5.3833,-52.95,ALETHEIA Appartement Sinnamary
312,Kabardinka,RU,76.98,few clouds,44.6514,37.9428,База отдыха Орленок
313,Poum,NC,72.95,few clouds,-20.2333,164.0167,
314,Vari,GR,79.02,clear sky,37.8312,23.8081,The Margi
315,Akhmim,EG,82.6,clear sky,26.5622,31.745,برج الماسة السكني
316,Tecoanapa,MX,76.89,overcast clouds,16.5167,-98.75,Hotel Manglares
317,Brownsville,US,82.9,clear sky,25.9018,-97.4975,Best Western Hotel Plaza Matamoros


In [13]:
# 7b. Replace empty hotel names with NaN
clean_hotel_df["Hotel Name"].replace("",np.nan,inplace=True)
print(f"clean_hotel_df has {clean_hotel_df['Hotel Name'].isnull().sum()} cities with no hotel.")
clean_hotel_df.tail(10)

clean_hotel_df has 29 cities with no hotel.


Unnamed: 0,City,Country,Max Temp,Current Description,Lat,Lng,Hotel Name
308,Cabanas,CU,75.52,light rain,22.9758,-82.9219,Villa Luis Montesino y Anabel
309,Sangar,RU,66.07,overcast clouds,63.9242,127.4739,
310,Amapa,BR,69.4,scattered clouds,1.0,-52.0,
311,Sinnamary,GF,76.64,overcast clouds,5.3833,-52.95,ALETHEIA Appartement Sinnamary
312,Kabardinka,RU,76.98,few clouds,44.6514,37.9428,База отдыха Орленок
313,Poum,NC,72.95,few clouds,-20.2333,164.0167,
314,Vari,GR,79.02,clear sky,37.8312,23.8081,The Margi
315,Akhmim,EG,82.6,clear sky,26.5622,31.745,برج الماسة السكني
316,Tecoanapa,MX,76.89,overcast clouds,16.5167,-98.75,Hotel Manglares
317,Brownsville,US,82.9,clear sky,25.9018,-97.4975,Best Western Hotel Plaza Matamoros


In [14]:
# 7c. Drop NaNs
clean_hotel_df=clean_hotel_df.dropna()
clean_hotel_df.reset_index(drop=True,inplace=True)
print(f"Hotels found nearby {len(clean_hotel_df)} cities.")
clean_hotel_df.tail(10)

Hotels found nearby 289 cities.


Unnamed: 0,City,Country,Max Temp,Current Description,Lat,Lng,Hotel Name
279,Sabang,ID,82.67,broken clouds,5.8933,95.3214,Hotel Citra
280,Verkh-Usugli,RU,69.35,light rain,52.7,115.1667,"Gostinitsa ""Sever"""
281,Mogadishu,SO,80.47,scattered clouds,2.0371,45.3438,Hotel Juba
282,Cabanas,CU,75.52,light rain,22.9758,-82.9219,Villa Luis Montesino y Anabel
283,Sinnamary,GF,76.64,overcast clouds,5.3833,-52.95,ALETHEIA Appartement Sinnamary
284,Kabardinka,RU,76.98,few clouds,44.6514,37.9428,База отдыха Орленок
285,Vari,GR,79.02,clear sky,37.8312,23.8081,The Margi
286,Akhmim,EG,82.6,clear sky,26.5622,31.745,برج الماسة السكني
287,Tecoanapa,MX,76.89,overcast clouds,16.5167,-98.75,Hotel Manglares
288,Brownsville,US,82.9,clear sky,25.9018,-97.4975,Best Western Hotel Plaza Matamoros


## 8. Export the data

In [15]:
# 8a. Create the output File (CSV)
# Specified in Input Section
# 8b. Export the City_Data into a csv
clean_hotel_df.to_csv(output_data_file, index_label="City_ID")

# Create Map
## 9. Create template for info box

In [16]:
# 9. Using the template add city name, the country code, the weather description and maximum temperature for the city.
info_box_template = """
<dl>
<dt>City</dt><dd>{City}</dd>
<dt>Country</dt><dd>{Country}</dd>
<dt>Weather</dt><dd>{Current Description}</dd>
<dt>Max Temp</dt><dd>{Max Temp} °F</dd>
</dl>
"""


## 10. Add data for hotel markers

In [17]:
# 10a. Get the data from each row and add it to the formatting template and store the data in a list.
hotel_info = [info_box_template.format(**row) for index, row in clean_hotel_df.iterrows()]

# 10b. Get the latitude and longitude from each row and store in a new DataFrame.
locations = clean_hotel_df[["Lat", "Lng"]]

## 11. Create map 

In [18]:
# Call figure
fig = gmaps.figure(center=(30.0, 31.0), zoom_level=1.5)

# 11a. Add a marker layer for each city to the map. 
marker_layer = gmaps.marker_layer(locations, info_box_content=hotel_info)
fig.add_layer(marker_layer)

# 11b. Display the figure
fig

Figure(layout=FigureLayout(height='420px'))