In [1]:
# Import dependencies and API key

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import requests

from datetime import datetime
from citipy import citipy

from config import weather_api_key

In [2]:
# Create a set of random latitude and longitude combinations

lats = np.random.uniform(low=-90, high=90, size=1500)
lngs = np.random.uniform(low=-180, high=180, size=1500)

lat_lngs = zip(lats, lngs)

lat_lngs

<zip at 0x1e957733cc0>

In [3]:
# Add the latitudes and longitudes to a list

coordinates = list(lat_lngs)

In [4]:
# Create a list to hold the cities

cities = []

# Find the nearest city for each set of coordinates

for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name
    
    # Add the city to the cities list if it's not already in there
    if city not in cities:
        cities.append(city)

# Print the city count

cities[35]

'udachnyy'

In [5]:
# Create starting URL for API call
# Format from OpenWeather:
# https://api.openweathermap.org/data/2.5/weather?q={city name}&appid={API key}

url = "https://api.openweathermap.org/data/2.5/weather?units=imperial&appid=" + weather_api_key

In [6]:
# Create an empty list to hold the weather data

city_data = []

# Print the beginning of the log

print("Beginning data retrieval               ")
print("---------------------------------------")

# Create counters

record_count = 1
set_count = 1

Beginning data retrieval               
---------------------------------------


In [7]:
test_url = url + "&q=" + "athabasca"

In [8]:
test_weather = requests.get(test_url).json()

test_lat = test_weather["coord"]["lat"]
test_max = test_weather["main"]["temp_max"]

In [9]:
test_date = datetime.utcfromtimestamp(
    test_weather["dt"]).strftime("%Y-%M-%D %H:%M:%S")

In [11]:
# Loop through all our cities
for i, city in enumerate(cities):
    
    # Group cities in sets of 50 for logging purposes
    # If i is at least 50 and is divisible by 50, then we have 
    # started a new set.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        #After looping through 50 cities, we need to pause for a 
        # minute to avoid making too many API calls/minute.
        time.sleep(60)
        
    # Create endpoint URL for a city
    # If a city name has a space, we need to replace it with a 
    # plus sign in the URl
    city_url = url + "&q=" + city.replace(" ", "+")

    # Log the URl, the record and set numbers, and the city
    print(
        f"Processing record {record_count} of set {set_count} | {city}"
    )

    # Add one to the record count
    record_count += 1

    # Run an API request for each city
    try:
        #Retrieve data; parse JSON
        city_weather = requests.get(city_url).json()

        #Get the data points we need
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]

        # Convert the date to ISO standard
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime("%Y-%M-%D %H:%M:%S")

        # Append the data to the city_data list
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

    except:
        print("City not found. Skipping...")
        pass    

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Processing record 1 of set 1 | bambous virieux
Processing record 2 of set 1 | phan thiet
Processing record 3 of set 1 | avarua
Processing record 4 of set 1 | nizhneyansk
City not found. Skipping...
Processing record 5 of set 1 | vaitupu
City not found. Skipping...
Processing record 6 of set 1 | kushima
Processing record 7 of set 1 | taolanaro
City not found. Skipping...
Processing record 8 of set 1 | mount isa
Processing record 9 of set 1 | provideniya
Processing record 10 of set 1 | raudeberg
Processing record 11 of set 1 | chifeng
Processing record 12 of set 1 | geraldton
Processing record 13 of set 1 | sentyabrskiy
City not found. Skipping...
Processing record 14 of set 1 | busselton
Processing record 15 of set 1 | merauke
Processing record 16 of set 1 | butaritari
Processing record 17 of set 1 | monrovia
Processing record 18 of set 1 | taoudenni
Processing record 19 of set 1 | cadillac
Processing record 20 of set 1 | viedma
Processing record 21 of set 1 | cabo san lucas
Processing 

City not found. Skipping...
Processing record 40 of set 4 | luwuk
Processing record 41 of set 4 | chachapoyas
Processing record 42 of set 4 | armacao dos buzios
City not found. Skipping...
Processing record 43 of set 4 | topeka
Processing record 44 of set 4 | san felipe
Processing record 45 of set 4 | ust-ishim
Processing record 46 of set 4 | datong
Processing record 47 of set 4 | jamestown
Processing record 48 of set 4 | farafangana
Processing record 49 of set 4 | alberton
Processing record 50 of set 4 | sur
Processing record 1 of set 5 | dunedin
Processing record 2 of set 5 | east london
Processing record 3 of set 5 | upernavik
Processing record 4 of set 5 | moundou
Processing record 5 of set 5 | saskylakh
Processing record 6 of set 5 | port hardy
Processing record 7 of set 5 | las vegas
Processing record 8 of set 5 | vilhena
Processing record 9 of set 5 | manta
Processing record 10 of set 5 | korla
Processing record 11 of set 5 | micheweni
Processing record 12 of set 5 | tiksi
Proce

Processing record 31 of set 8 | chuy
Processing record 32 of set 8 | tuatapere
Processing record 33 of set 8 | nalhati
Processing record 34 of set 8 | ostrovnoy
Processing record 35 of set 8 | barinitas
Processing record 36 of set 8 | kaeo
Processing record 37 of set 8 | adrar
Processing record 38 of set 8 | hualmay
Processing record 39 of set 8 | sataua
City not found. Skipping...
Processing record 40 of set 8 | ancud
Processing record 41 of set 8 | clyde river
Processing record 42 of set 8 | dudinka
Processing record 43 of set 8 | puerto escondido
Processing record 44 of set 8 | morehead
Processing record 45 of set 8 | taurage
Processing record 46 of set 8 | jiayuguan
Processing record 47 of set 8 | kamenka
Processing record 48 of set 8 | camapua
Processing record 49 of set 8 | huarmey
Processing record 50 of set 8 | mariestad
Processing record 1 of set 9 | yanan
City not found. Skipping...
Processing record 2 of set 9 | faanui
Processing record 3 of set 9 | kontagora
Processing reco

Processing record 19 of set 12 | flin flon
Processing record 20 of set 12 | luderitz
Processing record 21 of set 12 | sistranda
Processing record 22 of set 12 | lagoa
Processing record 23 of set 12 | kulykivka
Processing record 24 of set 12 | montepuez
Processing record 25 of set 12 | aykhal
Processing record 26 of set 12 | kupang
Processing record 27 of set 12 | wewak
Processing record 28 of set 12 | wattegama
Processing record 29 of set 12 | nushki
Processing record 30 of set 12 | beloha
Processing record 31 of set 12 | nouakchott
Processing record 32 of set 12 | mizpe ramon
City not found. Skipping...
Processing record 33 of set 12 | soe
Processing record 34 of set 12 | turukhansk
Processing record 35 of set 12 | dodge city
Processing record 36 of set 12 | moron
Processing record 37 of set 12 | tual
Processing record 38 of set 12 | toliary
City not found. Skipping...
Processing record 39 of set 12 | akdepe
Processing record 40 of set 12 | praia da vitoria
Processing record 41 of set

In [12]:
len(city_data)

598

In [13]:
# Save the data in a dataframe

city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Bambous Virieux,-20.3428,57.7575,72.21,78,75,10.36,MU,2022-20-09/19/22 15:20:51
1,Phan Thiet,10.9333,108.1,79.18,81,79,8.46,VN,2022-16-09/19/22 15:16:55
2,Avarua,-21.2078,-159.775,73.45,83,100,2.3,CK,2022-20-09/19/22 15:20:29
3,Kushima,31.4583,131.2333,71.2,89,100,9.19,JP,2022-20-09/19/22 15:20:55
4,Mount Isa,-20.7333,139.5,73.17,46,30,0.0,AU,2022-20-09/19/22 15:20:55
5,Provideniya,64.3833,-173.3,40.14,84,100,0.38,RU,2022-20-09/19/22 15:20:56
6,Raudeberg,61.9875,5.1352,52.47,67,68,21.07,NO,2022-20-09/19/22 15:20:58
7,Chifeng,42.2683,118.9636,46.62,31,92,3.91,CN,2022-20-09/19/22 15:20:58
8,Geraldton,-28.7667,114.6,59.41,63,100,11.5,AU,2022-20-09/19/22 15:20:48
9,Busselton,-33.65,115.3333,55.44,71,99,9.6,AU,2022-21-09/19/22 15:21:00


In [15]:
# Change order of dataframe columns

new_col_order = [
    "City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]

In [16]:
city_data_df = city_data_df[new_col_order]

In [17]:
city_data_df.head()

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Bambous Virieux,MU,2022-20-09/19/22 15:20:51,-20.3428,57.7575,72.21,78,75,10.36
1,Phan Thiet,VN,2022-16-09/19/22 15:16:55,10.9333,108.1,79.18,81,79,8.46
2,Avarua,CK,2022-20-09/19/22 15:20:29,-21.2078,-159.775,73.45,83,100,2.3
3,Kushima,JP,2022-20-09/19/22 15:20:55,31.4583,131.2333,71.2,89,100,9.19
4,Mount Isa,AU,2022-20-09/19/22 15:20:55,-20.7333,139.5,73.17,46,30,0.0


In [18]:
# Create the output CSV file

output_data_file = "weather_data/cities.csv"

# Export the city_data into the csv

city_data_df.to_csv(output_data_file, index_label="City_ID")