In [1]:
### dependencies
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
import time
from citipy import citipy
from scipy.stats import linregress
from datetime import datetime
from config import (weather_api_key)


url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key


In [2]:
# create random latitude/longitude combinations
lats = np.random.uniform(low=-90.0, high=90.0, size=200)
lngs = np.random.uniform(low=-180.0, high=180.0, size=200)
lat_lngs=zip(lats, lngs)
lat_lngs

<zip at 0x20176c1f708>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs)

In [4]:
# create a list of cities near the random long/lat points
cities = []
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0],coordinate[1]).city_name
    if city not in cities:
        cities.append(city)
# Print the city count to confirm sufficient count
len(cities)

141

In [5]:
# create an empty list to hold the weather data
city_data = []
# Print beginning of the logging
print("Beginning Data Retrieval         ")
print("---------------------------------")

# Create counters
record_count = 1
set_count = 1

Beginning Data Retrieval         
---------------------------------


In [8]:
# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1
        
# Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        #city_country = city_weather["sys"]["country"]
        #city_description = city_weather["weather"][0]["description"]
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Processing Record 42 of Set 3 | saint-philippe
City not found. Skipping...
Processing Record 43 of Set 3 | punta arenas
City not found. Skipping...
Processing Record 44 of Set 3 | kapaa
City not found. Skipping...
Processing Record 45 of Set 3 | ninghai
City not found. Skipping...
Processing Record 46 of Set 3 | mastic beach
City not found. Skipping...
Processing Record 47 of Set 3 | armidale
City not found. Skipping...
Processing Record 48 of Set 3 | hobart
City not found. Skipping...
Processing Record 49 of Set 3 | taolanaro
City not found. Skipping...
Processing Record 50 of Set 3 | albany
City not found. Skipping...
Processing Record 51 of Set 3 | attawapiskat
City not found. Skipping...
Processing Record 52 of Set 3 | bluff
City not found. Skipping...
Processing Record 53 of Set 3 | dundee
City not found. Skipping...
Processing Record 54 of Set 3 | tsihombe
City not found. Skipping...
Processing Record 55 of Set 3 | mataura
City not found. Skipping...
Processing Record 56 of Set 3

City not found. Skipping...
Processing Record 28 of Set 5 | eenhana
City not found. Skipping...
Processing Record 29 of Set 5 | auki
City not found. Skipping...
Processing Record 30 of Set 5 | sitio novo do tocantins
City not found. Skipping...
Processing Record 31 of Set 5 | kropotkin
City not found. Skipping...
Processing Record 32 of Set 5 | sokolo
City not found. Skipping...
Processing Record 33 of Set 5 | tumannyy
City not found. Skipping...
Processing Record 34 of Set 5 | grand river south east
City not found. Skipping...
Processing Record 35 of Set 5 | rawson
City not found. Skipping...
Processing Record 36 of Set 5 | houma
City not found. Skipping...
Processing Record 37 of Set 5 | cedral
City not found. Skipping...
Processing Record 38 of Set 5 | makakilo city
City not found. Skipping...
Processing Record 39 of Set 5 | cape town
City not found. Skipping...
Processing Record 40 of Set 5 | arraial do cabo
City not found. Skipping...
Processing Record 41 of Set 5 | maceio
City no

In [9]:
len(city_data)

0

In [None]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.to_csv("weather_data/cities.csv") 
city_data_df.head()

In [None]:
import time
today = time.time()
today

In [None]:
# extract relevant fields from the dataframe for plotting
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Cloudiness"]
wind_speed = city_data_df["Wind Speed"]

In [None]:
# build scatter plot, lat vs max temp
plt.scatter(lats,
           max_temps,
           edgecolor="black",linewidths=1,marker="o",
           alpha=0.8, label="Cities")
plt.title(f'City Latitude vs. Max Temp ' + time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig1.png")

# Show plot.
plt.show()
          

In [None]:
# build scatter plot, lat vs max humidity
plt.scatter(lats,
           humidity,
           edgecolor="black",linewidths=1,marker="o",
           alpha=0.8, label="Cities")
plt.title(f'City Latitude vs. humidity ' + time.strftime("%x"))
plt.ylabel("Ambient Wetness")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig2.png")

# Show plot.
plt.show()

In [None]:
# build scatter plot, lat vs max cloudicity
plt.scatter(lats,
           cloudiness,
           edgecolor="black",linewidths=1,marker="o",
           alpha=0.8, label="Cities")
plt.title(f'City Latitude vs. cloudfullness ' + time.strftime("%x"))
plt.ylabel("Cloudicity")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig3.png")

# Show plot.
plt.show()

In [None]:
# build scatter plot, lat vs wind speed
plt.scatter(lats,
           wind_speed,
           edgecolor="black",linewidths=1,marker="o",
           alpha=0.8, label="Cities")
plt.title(f'City Latitude vs. wind speed ' + time.strftime("%x"))
plt.ylabel("Wind Speed")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig4.png")

# Show plot.
plt.show()

In [None]:
# Create a function to create perform linear regression on the weather data
# and plot a regression line and the equation with the data.
def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):
    
    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    # Get the equation of the line.
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    title = title + '\n R-value = ' + str(round(r_value,2))
    plt.title(title)
    plt.show()

In [None]:
# create northern and southern hemisphere dataframes
northern_hemi_df = city_data_df.loc[city_data_df["Lat"] >= 0]
southern_hemi_df = city_data_df.loc[city_data_df["Lat"] <= 0]

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,'Linear Regression Northern Hemisphere \n for Maximum Temperature','Max Temp',(10,10))


In [None]:
# Linear regression on the Northern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,'Linear Regression Southern Hemisphere \n for Maximum Temperature','Max Temp',(-25,50))


In [None]:
# Linear regression on % cloudiness in northern hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression Northern Hemisphere \n for Cloudiness',
                       'Cloudiness',(10,10))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n for % Cloudiness',
                       '% Cloudiness',(-50,60))

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \n for Wind Speed', 
                       'Wind Speed',(30,35))

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \n for Wind Speed', 
                       'Wind Speed',(-50,35))