In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
# Create a set of random latitude and longitude combinations.
#The zip object packs each pair of lats and lngs having the same index in their respective array into a tuple. 
#If there are 1,500 latitudes and longitudes, there will be 1,500 tuples of paired latitudes and longitudes, 
#where each latitude and longitude in a tuple can be accessed by the index of 0 and 1, respectively.

lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs


<zip at 0x7fd428d09960>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [4]:
# Use the print() function to display the latitude and longitude combinations.
for coordinate in coordinates:
    print(coordinate[0], coordinate[1])

-0.08992892356395998 -23.04453433891021
68.42612163252579 -137.53929803009942
67.11758997218868 70.49115240594531
19.22192910587492 90.99574200713596
39.28850121414686 1.5564792071513978
39.786535966205406 -124.32041538292887
84.07425542654809 172.43948447574166
32.10391833177721 11.826418495501201
-36.319956932733476 -68.41489909669677
69.1626865021675 -72.63866189600341
-17.50147970582411 -12.100709274033989
-56.060818610475 -111.76263409870731
-10.531190955095084 94.05958802897317
46.27792169800392 77.81874320881036
-10.471960144890346 -146.20723660983714
-2.2223486167440853 38.58965486088317
36.65141366796698 -52.55664035489757
-63.40272172402446 42.15959166563553
-51.27712101116123 -102.86273992537726
-74.28881116242759 -106.23683953200765
77.25337902063069 -99.2588465477823
-31.020878887993263 -74.79585676382355
4.303907197705328 -124.9413378527583
54.01256581642386 162.23335118645286
79.44128255770485 -137.97750969945056
56.65255806349876 -42.35977915367505
7.632218826657137 129

-11.731728932999118 -149.56571377817485
65.34764082102677 132.94280193822163
-85.64380443411973 -147.96145489444157
5.837949813228036 -40.42303401147123
-83.72791798606504 73.69163180074844
44.32029423598988 -99.02025438011711
2.327565933731293 93.04552235701158
85.98941179373412 75.07552907170259
10.66573709587027 -143.74228752764856
23.030393859461427 -69.56270980758671
-83.69456632398774 159.49210955490526
-26.712325618208034 -13.238398171598817
-46.86523324684822 21.092874533736676
58.78735555955927 -25.43776584144203
-53.857304062453586 117.64612068541862
-75.2997652454977 50.596860537352626
-88.34551226641766 157.32303225134729
-30.326351586033894 -13.924208394369685
23.718694616365894 137.84041545100035
-28.83644540517009 -83.77005664742339
55.98114189161731 109.62844501765966
-68.87641107243947 46.45653340177998
-1.5095348197809528 -178.82327145227435
1.0087768476009131 -18.1733908107569
72.22939040101733 -122.05942630097093
-8.957892737436964 129.71910293886333
-31.51726764427

In [5]:
from citipy import citipy

In [7]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count.
len(cities)


611

In [8]:
# Import requests library and weather_api_key
import requests
from config import weather_api_key


In [9]:
# Starting URL for Weather Map API Call.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key
print(url)


http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=d0b2725963c21a3632b9c5be9983b808


In [10]:
# Import the time library and the datetime module from the datetime library 
import time
from datetime import datetime

In [14]:
# Create an empty list to hold the weather data. (6.2.6)
#In the code block, we have initialized the counters at 1 because we want the first iteration
#of the logging for each recorded response and the set to start at 1.

# List of city data
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city
    city_url = url + "&q=" + city.replace(" ", "+")
    
    # Log the url, record, and set numbers
    print(f"Processing Record {record_count} of Set {set_count} | {city}")

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        # Parse the JSON and retrieve data
        city_weather = requests.get(city_url).json()

        # Parse out the max temp, humidity, and cloudiness
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S') 

        # Append the City information into city_data list
        city_data.append({"City": city.title(), 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    # If an error is experienced, skip the city
    except:
        print("City not found. Skipping...")
        pass
              

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")



Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | georgetown
Processing Record 2 of Set 1 | aklavik
Processing Record 3 of Set 1 | yar-sale
Processing Record 4 of Set 1 | teknaf
Processing Record 5 of Set 1 | santa eulalia del rio
City not found. Skipping...
Processing Record 6 of Set 1 | fortuna
Processing Record 7 of Set 1 | pevek
Processing Record 8 of Set 1 | jadu
Processing Record 9 of Set 1 | san rafael
Processing Record 10 of Set 1 | clyde river
Processing Record 11 of Set 1 | jamestown
Processing Record 12 of Set 1 | rikitea
Processing Record 13 of Set 1 | bengkulu
Processing Record 14 of Set 1 | ushtobe
Processing Record 15 of Set 1 | atuona
Processing Record 16 of Set 1 | kitui
Processing Record 17 of Set 1 | saint-pierre
Processing Record 18 of Set 1 | port alfred
Processing Record 19 of Set 1 | castro
Processing Record 20 of Set 1 | punta arenas
Processing Record 21 of Set 1 | yellowknife
Processing Record 22 of Set 1 | coquimbo
Proc

Processing Record 35 of Set 4 | basay
Processing Record 36 of Set 4 | gamba
Processing Record 37 of Set 4 | puri
Processing Record 38 of Set 4 | tasiilaq
Processing Record 39 of Set 4 | monte alegre
Processing Record 40 of Set 4 | sevierville
Processing Record 41 of Set 4 | sisimiut
Processing Record 42 of Set 4 | yining
Processing Record 43 of Set 4 | guarapari
Processing Record 44 of Set 4 | road town
Processing Record 45 of Set 4 | saint george
Processing Record 46 of Set 4 | hamilton
Processing Record 47 of Set 4 | karachi
Processing Record 48 of Set 4 | luwuk
Processing Record 49 of Set 4 | mys shmidta
City not found. Skipping...
Processing Record 50 of Set 4 | sursk
Processing Record 1 of Set 5 | kosai
Processing Record 2 of Set 5 | kiama
Processing Record 3 of Set 5 | maturin
Processing Record 4 of Set 5 | lisakovsk
Processing Record 5 of Set 5 | saleaula
City not found. Skipping...
Processing Record 6 of Set 5 | dingle
Processing Record 7 of Set 5 | port said
Processing Record 

Processing Record 29 of Set 8 | longyearbyen
Processing Record 30 of Set 8 | fiumicino
Processing Record 31 of Set 8 | marcona
City not found. Skipping...
Processing Record 32 of Set 8 | margate
Processing Record 33 of Set 8 | jasper
Processing Record 34 of Set 8 | port macquarie
Processing Record 35 of Set 8 | pahrump
Processing Record 36 of Set 8 | azacualpa
Processing Record 37 of Set 8 | kulhudhuffushi
Processing Record 38 of Set 8 | yenangyaung
Processing Record 39 of Set 8 | acapulco
Processing Record 40 of Set 8 | buraydah
Processing Record 41 of Set 8 | meulaboh
Processing Record 42 of Set 8 | mehamn
Processing Record 43 of Set 8 | richards bay
Processing Record 44 of Set 8 | luderitz
Processing Record 45 of Set 8 | itoigawa
Processing Record 46 of Set 8 | brae
Processing Record 47 of Set 8 | salcininkai
Processing Record 48 of Set 8 | valparaiso
Processing Record 49 of Set 8 | alihe
Processing Record 50 of Set 8 | rio grande
Processing Record 1 of Set 9 | madang
Processing Rec

Processing Record 17 of Set 12 | katol
Processing Record 18 of Set 12 | muravlenko
Processing Record 19 of Set 12 | vredendal
Processing Record 20 of Set 12 | clinton
Processing Record 21 of Set 12 | klaksvik
Processing Record 22 of Set 12 | conde
Processing Record 23 of Set 12 | stabat
Processing Record 24 of Set 12 | saravena
Processing Record 25 of Set 12 | olinda
Processing Record 26 of Set 12 | ituni
City not found. Skipping...
Processing Record 27 of Set 12 | broken hill
Processing Record 28 of Set 12 | cayenne
Processing Record 29 of Set 12 | ostrovnoy
Processing Record 30 of Set 12 | chipinge
Processing Record 31 of Set 12 | fethiye
Processing Record 32 of Set 12 | maxixe
Processing Record 33 of Set 12 | guanica
Processing Record 34 of Set 12 | ayia galini
Processing Record 35 of Set 12 | gat
Processing Record 36 of Set 12 | rafaela
Processing Record 37 of Set 12 | axim
Processing Record 38 of Set 12 | beisfjord
Processing Record 39 of Set 12 | mari-turek
Processing Record 40 o

In [15]:
len(city_data)

567

In [16]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Georgetown,5.4112,100.3354,84.13,78,20,0.0,MY,2022-10-18 02:18:28
1,Aklavik,68.2191,-135.0107,26.51,91,100,4.47,CA,2022-10-18 02:22:42
2,Yar-Sale,66.8333,70.8333,34.32,93,71,13.76,RU,2022-10-18 02:22:43
3,Teknaf,20.8624,92.3058,79.12,86,98,3.94,BD,2022-10-18 02:22:43
4,Fortuna,40.5982,-124.1573,61.93,82,0,8.05,US,2022-10-18 02:22:44
5,Pevek,69.7008,170.3133,4.77,93,70,5.06,RU,2022-10-18 02:22:44
6,Jadu,31.953,12.0261,58.66,74,0,5.68,LY,2022-10-18 02:22:45
7,San Rafael,-34.6177,-68.3301,53.56,85,100,14.03,AR,2022-10-18 02:22:45
8,Clyde River,70.4692,-68.5914,23.23,86,100,6.91,CA,2022-10-18 02:22:46
9,Jamestown,42.097,-79.2353,40.59,68,75,18.41,US,2022-10-18 02:22:46


In [17]:
# Reorder the columns in the order you want them to appear.
new_column_order = ["City", "Country", "Date", "Lat",
                    "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"] 

# Assign a new or the same DataFrame the new column order.
city_data_df = city_data_df[new_column_order]
city_data_df.head(10)

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Georgetown,MY,2022-10-18 02:18:28,5.4112,100.3354,84.13,78,20,0.0
1,Aklavik,CA,2022-10-18 02:22:42,68.2191,-135.0107,26.51,91,100,4.47
2,Yar-Sale,RU,2022-10-18 02:22:43,66.8333,70.8333,34.32,93,71,13.76
3,Teknaf,BD,2022-10-18 02:22:43,20.8624,92.3058,79.12,86,98,3.94
4,Fortuna,US,2022-10-18 02:22:44,40.5982,-124.1573,61.93,82,0,8.05
5,Pevek,RU,2022-10-18 02:22:44,69.7008,170.3133,4.77,93,70,5.06
6,Jadu,LY,2022-10-18 02:22:45,31.953,12.0261,58.66,74,0,5.68
7,San Rafael,AR,2022-10-18 02:22:45,-34.6177,-68.3301,53.56,85,100,14.03
8,Clyde River,CA,2022-10-18 02:22:46,70.4692,-68.5914,23.23,86,100,6.91
9,Jamestown,US,2022-10-18 02:22:46,42.097,-79.2353,40.59,68,75,18.41


In [18]:
# Create the output File (CSV)
output_data_file = "weather_data/clean_cities.csv"
# Export the City_Data into a csv
city_data_df.to_csv(output_data_file, index_label="City_ID")

# Show Record Count
city_data_df.count()


City          567
Country       567
Date          567
Lat           567
Lng           567
Max Temp      567
Humidity      567
Cloudiness    567
Wind Speed    567
dtype: int64