In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# Use the citipy module to determine city based on latitude and longitude.The citipy module finds the nearest city to 
# the latitude and longitude pair with a population of 500 or more.
from citipy import citipy

# Import the requests library.
import requests

# Import the API key.
from config import weather_api_key

# Import the datetime module from the datetime library.
from datetime import datetime

In [2]:
# Create a set of random latitude and longitude combinations. lats = lattitudes, lngs =  longitudes. zip saves it as a zip
# object which pachs each pair of lats and lngs having the same index in their respective array into a tuple.  If there are
# 1,500 lats and lngs, there will be 1,500 tuples of paired lats and lngs, where each lats and lngs in a tuple can be
# accessed by the index of 0 and 1, respectively. 
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x2cbaa0ca2c8>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [4]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)

# Some of this code should look familiar, but let's break it down:

# We create a cities list to store city names.
# We iterate through the coordinates, as in our practice, and retrieve the nearest city using the lats and longs pair.
# We add a decision statement with the logical operator not in to determine whether the found city is already in the cities
# list. If not, then we'll use the append() function to add it. We are doing this because among the 1,500 latitudes and 
# longitudes, there might be duplicates, which will retrieve duplicate cities, and we want to be sure we capture only the 
# unique cities.

607

In [5]:
# For each city in our lats_lngs list, we need to retrieve the following data and add it to a DataFrame:

    # City, country, and date
    # Latitude and longitude
    # Maximum temperature
    # Humidity
    # Cloudiness
    # Wind speed

# Let's use pseudocode to map out, at a high level, how we will get the weather data for each city for the website.

# We will need to do the following:

# import our dependencies and initialize counters and an empty list that will hold the weather data. Dependencies will be added above
# Loop through the cities list.
# Group the cities in sets of 50 to log the process as we find the weather data for each city.
# Two counters will be needed here: one to log the city count from 1 to 50, and another for the sets.
# Build the city_url or endpoint for each city.
# Log the URL and the record and set numbers.
# Make an API request for each city.
# Parse the JSON weather data for the following:
# City, country, and date
# Latitude and longitude
# Maximum temperature
# Humidity
# Cloudiness
# Wind speed
# Add the data to a list in a dictionary format and then convert the list to a DataFrame.

# First create teh basic URL for the OpenWeatherMap
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [6]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

# Loop through all the cities in our list. This creates the URL for each one and groups our records in sets of 50.
# enumerate can be used instead of creating to for loops
for i, city in enumerate(cities):
    
    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    
# Let's break down the code so we understand fully before continuing:

    # We create the for loop with the enumerate() method and reference the index and the city in the list.
    # In the conditional statement, we check if the remainder of the index divided by 50 is equal to 0 and if the index 
    # is greater than or equal to 50. If the statement is true, then the set_count and the record_count are incremented by 1.
    # Inside the conditional statement, we create the URL endpoint for each city, as before. However, we are removing the 
    # blank spaces in the city name and concatenating the city name with, city.replace(" ","+"). This will find the 
    # corresponding weather data for the city instead of finding the weather data for the first part of the city name.
    # Also, we add a print statement that tells us the record count and set count, and the city that is being processed.
    # Then we add one to the record count before the next city is processed.
    
# Run an API request for each of the cities. If there is not data for the city, i.e., a response 404 then there is not 
# weather to retrieve and city not found. skipping is printed. Pass is a general purpose statem to handle errors and 
# allow the program to continue. Usually we dont want to use this and instead add another try block to print the error. 

    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                    "Lat": city_lat,
                    "Lng": city_lng,
                    "Max Temp": city_max_temp,
                    "Humidity": city_humidity,
                    "Cloudiness": city_clouds,
                    "Wind Speed": city_wind,
                    "Country": city_country,
                    "Date": city_date})

    # If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | hilo
Processing Record 2 of Set 1 | punta arenas
Processing Record 3 of Set 1 | bengkulu
Processing Record 4 of Set 1 | port hardy
Processing Record 5 of Set 1 | bandarbeyla
Processing Record 6 of Set 1 | chanasma
Processing Record 7 of Set 1 | lujan
Processing Record 8 of Set 1 | sentyabrskiy
City not found. Skipping...
Processing Record 9 of Set 1 | bambous virieux
Processing Record 10 of Set 1 | albany
Processing Record 11 of Set 1 | manaure
Processing Record 12 of Set 1 | bathsheba
Processing Record 13 of Set 1 | jamestown
Processing Record 14 of Set 1 | ushuaia
Processing Record 15 of Set 1 | kahului
Processing Record 16 of Set 1 | benguela
Processing Record 17 of Set 1 | mar del plata
Processing Record 18 of Set 1 | samarai
Processing Record 19 of Set 1 | vallenar
Processing Record 20 of Set 1 | kruisfontein
Processing Record 21 of Set 1 | gayny
Processing Record 22 of Set 1 | slave lake
Pr

Processing Record 184 of Set 4 | sao joao do piaui
Processing Record 185 of Set 4 | avarua
Processing Record 186 of Set 4 | alice springs
Processing Record 187 of Set 4 | oranjemund
Processing Record 188 of Set 4 | ladario
Processing Record 189 of Set 4 | charyshskoye
Processing Record 190 of Set 4 | mykolayiv
Processing Record 191 of Set 4 | filimonovo
Processing Record 192 of Set 4 | alekseyevsk
Processing Record 193 of Set 4 | miri
Processing Record 194 of Set 4 | itoman
Processing Record 195 of Set 4 | umzimvubu
City not found. Skipping...
Processing Record 196 of Set 4 | kobojango
City not found. Skipping...
Processing Record 197 of Set 4 | alvaraes
Processing Record 198 of Set 4 | attawapiskat
City not found. Skipping...
Processing Record 199 of Set 4 | virginia beach
Processing Record 200 of Set 4 | isangel
Processing Record 201 of Set 5 | villazon
Processing Record 202 of Set 5 | guerrero negro
Processing Record 203 of Set 5 | la ronge
Processing Record 204 of Set 5 | ulaanbaat

Processing Record 368 of Set 8 | oussouye
Processing Record 369 of Set 8 | baturaja
Processing Record 370 of Set 8 | kelvington
Processing Record 371 of Set 8 | dolbeau
City not found. Skipping...
Processing Record 372 of Set 8 | coquimbo
Processing Record 373 of Set 8 | faya
Processing Record 374 of Set 8 | clyde river
Processing Record 375 of Set 8 | palabuhanratu
City not found. Skipping...
Processing Record 376 of Set 8 | rio gallegos
Processing Record 377 of Set 8 | berdigestyakh
Processing Record 378 of Set 8 | tabiauea
City not found. Skipping...
Processing Record 379 of Set 8 | kloulklubed
Processing Record 380 of Set 8 | kupang
Processing Record 381 of Set 8 | hamilton
Processing Record 382 of Set 8 | buala
Processing Record 383 of Set 8 | antofagasta
Processing Record 384 of Set 8 | hanyang
Processing Record 385 of Set 8 | mannarakkat
Processing Record 386 of Set 8 | meulaboh
Processing Record 387 of Set 8 | jieshi
Processing Record 388 of Set 8 | linguere
Processing Record 3

Processing Record 554 of Set 12 | barentsburg
City not found. Skipping...
Processing Record 555 of Set 12 | lukoyanov
Processing Record 556 of Set 12 | catumbela
Processing Record 557 of Set 12 | knysna
Processing Record 558 of Set 12 | haibowan
City not found. Skipping...
Processing Record 559 of Set 12 | concepcion del oro
Processing Record 560 of Set 12 | ewa beach
Processing Record 561 of Set 12 | orcopampa
Processing Record 562 of Set 12 | bababe
City not found. Skipping...
Processing Record 563 of Set 12 | dongsheng
Processing Record 564 of Set 12 | port moresby
Processing Record 565 of Set 12 | kodinsk
Processing Record 566 of Set 12 | qaqortoq
Processing Record 567 of Set 12 | kathu
Processing Record 568 of Set 12 | pitsunda
Processing Record 569 of Set 12 | tyazhinskiy
Processing Record 570 of Set 12 | usinsk
Processing Record 571 of Set 12 | eenhana
Processing Record 572 of Set 12 | kedrovyy
Processing Record 573 of Set 12 | saravena
Processing Record 574 of Set 12 | gizo
Pro

In [7]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Hilo,19.7297,-155.09,75.61,94,90,8.05,US,2021-07-26 02:27:58
1,Punta Arenas,-53.15,-70.9167,33.91,93,0,12.66,CL,2021-07-26 02:24:20
2,Bengkulu,-3.8004,102.2655,81.16,63,0,2.51,ID,2021-07-26 02:23:08
3,Port Hardy,50.6996,-127.4199,62.78,82,75,2.3,CA,2021-07-26 02:27:59
4,Bandarbeyla,9.4942,50.8122,76.57,79,80,23.89,SO,2021-07-26 02:27:59
5,Chanasma,23.7167,72.1167,79.25,90,100,15.05,IN,2021-07-26 02:28:00
6,Lujan,-34.5703,-59.105,55.15,96,0,7.23,AR,2021-07-26 02:28:00
7,Bambous Virieux,-20.3428,57.7575,70.05,64,20,9.22,MU,2021-07-26 02:28:00
8,Albany,42.6001,-73.9662,72.39,94,0,4.09,US,2021-07-26 02:23:22
9,Manaure,11.7751,-72.4445,79.3,86,100,7.02,CO,2021-07-26 02:28:01


In [8]:
# Change the column order
new_column_order  = ['City','Country','Date', 'Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed',]

# Assign district summary df the new column order.
city_data_df = city_data_df[new_column_order]
city_data_df.head(10)


Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Hilo,US,2021-07-26 02:27:58,19.7297,-155.09,75.61,94,90,8.05
1,Punta Arenas,CL,2021-07-26 02:24:20,-53.15,-70.9167,33.91,93,0,12.66
2,Bengkulu,ID,2021-07-26 02:23:08,-3.8004,102.2655,81.16,63,0,2.51
3,Port Hardy,CA,2021-07-26 02:27:59,50.6996,-127.4199,62.78,82,75,2.3
4,Bandarbeyla,SO,2021-07-26 02:27:59,9.4942,50.8122,76.57,79,80,23.89
5,Chanasma,IN,2021-07-26 02:28:00,23.7167,72.1167,79.25,90,100,15.05
6,Lujan,AR,2021-07-26 02:28:00,-34.5703,-59.105,55.15,96,0,7.23
7,Bambous Virieux,MU,2021-07-26 02:28:00,-20.3428,57.7575,70.05,64,20,9.22
8,Albany,US,2021-07-26 02:23:22,42.6001,-73.9662,72.39,94,0,4.09
9,Manaure,CO,2021-07-26 02:28:01,11.7751,-72.4445,79.3,86,100,7.02


In [10]:
#Save dataframe as csv
pd.DataFrame.to_csv(city_data_df, 'cities.csv',)
city_data_df.head()

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Hilo,US,2021-07-26 02:27:58,19.7297,-155.09,75.61,94,90,8.05
1,Punta Arenas,CL,2021-07-26 02:24:20,-53.15,-70.9167,33.91,93,0,12.66
2,Bengkulu,ID,2021-07-26 02:23:08,-3.8004,102.2655,81.16,63,0,2.51
3,Port Hardy,CA,2021-07-26 02:27:59,50.6996,-127.4199,62.78,82,75,2.3
4,Bandarbeyla,SO,2021-07-26 02:27:59,9.4942,50.8122,76.57,79,80,23.89
