In [1]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from citipy import citipy
import requests
from config import weather_api_key
import time
from datetime import datetime

### Generate Random Coordinates of World Cities

In [2]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs

<zip at 0x1d757aedfc0>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [4]:
# Display the latitude and longitude combinations.
for coordinate in coordinates:
    print(coordinate[0], coordinate[1])
# for lat, lon in coordinates:
#     print(lat, lon)

87.1736315998026 176.48948417996013
36.87600826145295 102.40196412328254
6.510013980930978 105.3241806803432
-52.96969525068611 -151.84818263301625
-53.739838257632464 168.02211406879638
3.5089728744906097 33.75892491166687
-43.62317380622872 -82.87841844779788
61.279767445489824 72.66088852324373
8.200208879180309 107.6311411119757
39.565770336226905 -171.02255273144664
56.25031606835677 -156.42841023776464
-33.12685938741268 160.14308382031425
-74.69838798582532 138.19480688145165
-62.122780949653524 15.183468206033012
-13.734322608734033 4.116319979212676
40.13742380584972 56.96399566015623
-2.865066037513685 80.75878971749529
-3.68612145476159 -57.136647055043
-30.143363489543034 -79.08358430490556
57.56017504091804 13.263737385099063
-53.79996721150124 -104.8213458550122
60.731831312496126 -68.5459734135941
-41.96324339566149 -40.67326849644911
-24.72755281679558 65.23297383497192
-35.381193232875475 -76.76611011670687
21.039142781157835 86.89190697570547
39.223832544741185 -157.4

In [5]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count. (Above 500)
len(cities)

571

### Retrieve, Collect, and Clean Weather Data

In [6]:
# Starting URL for Weather Map API Call.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [7]:
# Create an endpoint URL for a city.
city_url = url + "&q=" + "Boston"
city_weather = requests.get(city_url)
city_weather.json()

{'coord': {'lon': -71.0598, 'lat': 42.3584},
 'weather': [{'id': 802,
   'main': 'Clouds',
   'description': 'scattered clouds',
   'icon': '03n'}],
 'base': 'stations',
 'main': {'temp': 80.47,
  'feels_like': 84.31,
  'temp_min': 75.09,
  'temp_max': 84.45,
  'pressure': 1017,
  'humidity': 74},
 'visibility': 10000,
 'wind': {'speed': 10.36, 'deg': 210},
 'clouds': {'all': 40},
 'dt': 1659665845,
 'sys': {'type': 2,
  'id': 2013408,
  'country': 'US',
  'sunrise': 1659605988,
  'sunset': 1659657649},
 'timezone': -14400,
 'id': 4930956,
 'name': 'Boston',
 'cod': 200}

In [8]:
# Get the JSON data.
boston_data = city_weather.json()

In [9]:
lat = boston_data["coord"]["lat"]
lng = boston_data["coord"]["lon"]
max_temp = boston_data["main"]["temp_max"]
humidity = boston_data["main"]["humidity"]
clouds = boston_data["clouds"]["all"]
wind = boston_data["wind"]["speed"]
print(lat, lng, max_temp, humidity, clouds, wind)

42.3584 -71.0598 84.45 74 40 10.36


In [10]:
# Import the datetime module from the datetime library.
from datetime import datetime
# Get the date from the JSON file.
date = boston_data["dt"]
# Convert the UTC date to a date format with year, month, day, hours, minutes, and seconds.
datetime.utcfromtimestamp(date).strftime('%Y-%m-%d %H:%M:%S')

'2022-08-05 02:17:25'

In [11]:
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1

# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
        time.sleep(60)

    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})
    
    # If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass
    
    # Indicate that Data Loading is complete.
    print("-----------------------------")
    print("Data Retrieval Complete      ")
    print("-----------------------------")
        

Beginning Data Retrieval     
-----------------------------
Processing Record 1 of Set 1 | leningradskiy
-----------------------------
Data Retrieval Complete      
-----------------------------
Processing Record 2 of Set 1 | xining
-----------------------------
Data Retrieval Complete      
-----------------------------
Processing Record 3 of Set 1 | kuala terengganu
-----------------------------
Data Retrieval Complete      
-----------------------------
Processing Record 4 of Set 1 | mataura
-----------------------------
Data Retrieval Complete      
-----------------------------
Processing Record 5 of Set 1 | bluff
-----------------------------
Data Retrieval Complete      
-----------------------------
Processing Record 6 of Set 1 | kaabong
-----------------------------
Data Retrieval Complete      
-----------------------------
Processing Record 7 of Set 1 | castro
-----------------------------
Data Retrieval Complete      
-----------------------------
Processing Record 8 of Set

In [12]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10)

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Leningradskiy,69.3833,178.4167,40.15,82,100,5.1,RU,2022-08-05 02:19:45
1,Xining,36.6167,101.7667,68.38,69,11,2.19,CN,2022-08-05 02:20:35
2,Kuala Terengganu,5.3302,103.1408,84.15,79,20,5.75,MY,2022-08-05 02:20:35
3,Mataura,-46.1927,168.8643,57.47,67,100,4.68,NZ,2022-08-05 02:19:41
4,Bluff,-46.6,168.3333,57.79,60,100,20.45,NZ,2022-08-05 02:20:36
5,Kaabong,3.4836,34.1492,63.0,86,100,4.59,UG,2022-08-05 02:20:36
6,Castro,-24.7911,-50.0119,67.21,49,99,6.82,BR,2022-08-05 02:18:01
7,Nefteyugansk,61.0998,72.6035,64.92,60,57,8.52,RU,2022-08-05 02:20:37
8,Tra Vinh,9.9347,106.3453,82.29,74,100,10.22,VN,2022-08-05 02:20:37
9,Kapaa,22.0752,-159.319,84.11,70,0,9.22,US,2022-08-05 02:19:42


In [14]:
new_column_order = ["City", "Country", "Date", "Lat", "Lng", "Max Temp", "Humidity", "Cloudiness", "Wind Speed"]
city_data_df = city_data_df[new_column_order]
city_data_df.head()

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Leningradskiy,RU,2022-08-05 02:19:45,69.3833,178.4167,40.15,82,100,5.1
1,Xining,CN,2022-08-05 02:20:35,36.6167,101.7667,68.38,69,11,2.19
2,Kuala Terengganu,MY,2022-08-05 02:20:35,5.3302,103.1408,84.15,79,20,5.75
3,Mataura,NZ,2022-08-05 02:19:41,-46.1927,168.8643,57.47,67,100,4.68
4,Bluff,NZ,2022-08-05 02:20:36,-46.6,168.3333,57.79,60,100,20.45


In [None]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID")

### Plot Weather Data

### Determine Correlations

### Use Google API to Create Heatmaps