In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from citipy import citipy
import time 
from datetime import datetime
import requests
from config import weather_api_key

In [2]:
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key


In [3]:
# Create a set of random latitude and longitude combinations

lats = np.random.uniform(low = -90.000, high = 90.000, size = 1500)
lngs = np.random.uniform(low = -180.000, high = 180.000, size = 1500)

lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x202f8074b80>

In [4]:
# Add the latitudes and longitudes to a list

coordinates = list(lat_lngs)

In [5]:
# Print the combinations
for coordinate in coordinates:

    print(coordinate[0], coordinate[1])

43.58137708749254 24.28945095251794
-24.8794421913519 149.87845719917283
-68.12846104274284 -74.15745456085732
86.8436137055337 135.2898599715067
54.58938846619375 -142.4318315786975
-23.424387835881234 146.0184275305498
67.16320104127925 -112.7892636602598
-89.56270492767428 -131.25024119396005
60.316547148109976 -49.46228867420783
-78.45872264855996 141.36483397675556
72.92877809028172 -103.38554123571537
22.320080085390174 150.92776916910321
-55.04282252975579 -166.37475502866826
-39.80407012141185 -167.49466108524018
-83.1396226165137 21.056710666875915
87.04794446719859 -14.440848800250308
73.50829933730293 -11.53019708547589
-81.08153770250381 94.10207152728941
-56.92859119951059 120.72794265298813
52.80636404980885 103.46266939892178
-67.8106306007223 50.91461373039252
-10.69538368580271 132.60288393339715
-84.45117768957623 139.90377930913735
-28.24683512452343 -22.865531640566275
-78.10818148096254 167.36057556521354
68.36296350694374 -69.20955937394058
72.84767508194312 -157.

In [6]:
# Create a new list for holding cities
cities = []

# ID the nearest city for each lat/long combination
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list

    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

638

In [7]:
# Create an empty list to hold the weather data
city_data = []

# Print the beginning of the logging.

print("Beginning Data Retrieval       ")
print("-------------------------------")

# Create counters

record_count = 1
set_count = 1

Beginning Data Retrieval       
-------------------------------


In [8]:
# Loop through all of the cities in the list
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city
    city_url = url + "&q=" + cities[i]


    # Log the URL, record, and set numbers and the city

    print(f"Processing Record {record_count} of Set {set_count} | {city}")

    # Add 1 to the record count

    record_count += 1

# Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(
            city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")


Processing Record 1 of Set 1 | grojdibodu
Processing Record 2 of Set 1 | biloela
Processing Record 3 of Set 1 | ushuaia
Processing Record 4 of Set 1 | nizhneyansk
City not found. Skipping...
Processing Record 5 of Set 1 | sitka
Processing Record 6 of Set 1 | emerald
Processing Record 7 of Set 1 | yellowknife
Processing Record 8 of Set 1 | rikitea
Processing Record 9 of Set 1 | paamiut
Processing Record 10 of Set 1 | hobart
Processing Record 11 of Set 1 | katsuura
Processing Record 12 of Set 1 | avarua
Processing Record 13 of Set 1 | bredasdorp
Processing Record 14 of Set 1 | illoqqortoormiut
City not found. Skipping...
Processing Record 15 of Set 1 | husavik
Processing Record 16 of Set 1 | albany
Processing Record 17 of Set 1 | tayturka
Processing Record 18 of Set 1 | east london
Processing Record 19 of Set 1 | jabiru
City not found. Skipping...
Processing Record 20 of Set 1 | vila velha
Processing Record 21 of Set 1 | bluff
Processing Record 22 of Set 1 | clyde river
Processing Record

In [None]:
# Convert the array of dictionaries to a pandas DF

city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Buraydah,26.326,43.975,99.05,11,5,10.51,SA,2022-06-27 17:36:34
1,Mehamn,71.0357,27.8492,64.35,48,8,12.66,NO,2022-06-27 17:36:34
2,Pevek,69.7008,170.3133,35.44,78,10,6.55,RU,2022-06-27 17:36:35
3,Albany,42.6001,-73.9662,78.03,59,99,1.99,US,2022-06-27 17:36:35
4,Mar Del Plata,-38.0023,-57.5575,60.1,60,0,14.97,AR,2022-06-27 17:33:58
5,Vilhena,-12.7406,-60.1458,84.42,39,0,8.05,BR,2022-06-27 17:36:36
6,Kawambwa,-9.7915,29.0791,60.58,47,1,8.5,ZM,2022-06-27 17:36:36
7,Khorramshahr,30.4397,48.1664,102.15,21,0,11.5,IR,2022-06-27 17:36:36
8,Souillac,-20.5167,57.5167,70.18,83,40,5.75,MU,2022-06-27 17:36:37
9,Poum,-20.2333,164.0167,74.61,78,100,14.94,NC,2022-06-27 17:36:37


In [None]:
new_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]
city_data_df = city_data_df[new_order]
city_data_df.head()


Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Buraydah,SA,2022-06-27 17:36:34,26.326,43.975,99.05,11,5,10.51
1,Mehamn,NO,2022-06-27 17:36:34,71.0357,27.8492,64.35,48,8,12.66
2,Pevek,RU,2022-06-27 17:36:35,69.7008,170.3133,35.44,78,10,6.55
3,Albany,US,2022-06-27 17:36:35,42.6001,-73.9662,78.03,59,99,1.99
4,Mar Del Plata,AR,2022-06-27 17:33:58,-38.0023,-57.5575,60.1,60,0,14.97


In [None]:
# Create the output file (CSV)
output_data_file = "Weather_Data/cities.csv"

# Export the City_Data into csv
city_data_df.to_csv(output_data_file, index_label="City_ID")