In [1]:
# Import the dependencies.
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np 
import requests
from citipy import citipy 
import time 
from datetime import datetime
from config import api_key 


In [2]:
# Create a set of random latitude and longitude combinations.
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
lat_lngs 

<zip at 0x7fda50841d70>

In [3]:
# Add the latitudes and longitudes to a list.
coordinates = list(lat_lngs) 
coordinates 

[(-27.3533287056747, -8.634242905055771),
 (-59.08661180757893, 89.09373082678229),
 (67.87052462391745, -108.39683114067581),
 (23.75790054430807, 131.05909340200202),
 (-63.80365071966375, 9.436076272904188),
 (68.9569358319566, 72.67749131299558),
 (-8.8672369439535, 102.92199637553381),
 (63.82730097123209, 133.4608559024211),
 (4.334363740275251, -40.9774925680797),
 (-35.31981921141063, -155.2485600741417),
 (-19.068183297241205, 85.47923435932591),
 (-88.5926469202052, 98.62452836916259),
 (-76.09036509652529, 65.277758635254),
 (8.467523652590302, -133.2022666432107),
 (-16.809935326072974, -117.5507819207503),
 (29.899946739902404, 29.371602874269428),
 (4.5493036155823035, 151.3211664831765),
 (55.25043560563691, 173.28904621849819),
 (-34.974857333402554, 144.05773100570428),
 (57.20810159165288, -86.9195262347738),
 (-9.687030997845795, -177.85344990159146),
 (-48.16563877026987, 140.55130575545138),
 (-84.89453513105195, 97.49298788860091),
 (18.767136786076406, 90.8980598

In [4]:
# Create a list for holding the cities.
cities = []
# Identify the nearest city for each latitude and longitude combination.
for coordinate in coordinates:
    city = citipy.nearest_city(coordinate[0], coordinate[1]).city_name

    # If the city is unique, then we will add it to the cities list.
    if city not in cities:
        cities.append(city) 
# Print the city count to confirm sufficient count.
len(cities) 

582

In [5]:
# Starting URL for Weather Map API Call.
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + api_key 

In [6]:
#6.2.6
# Create an empty list to hold the weather data.
city_data = []
# Print the beginning of the logging.
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters.
record_count = 1
set_count = 1 

Beginning Data Retrieval     
-----------------------------


In [7]:
# Loop through all the cities in our list.
for i in range(len(cities)):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city.
    city_url = url + "&q=" + cities[i] 

In [8]:
# Loop through all the cities in the list.
for i, city in enumerate(cities):

    # Group cities in sets of 50 for logging purposes.
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1
    # Create endpoint URL with each city.
    city_url = url + "&q=" + city.replace(" ","+")

    # Log the URL, record, and set numbers and the city.
    print(f"Processing Record {record_count} of Set {set_count} | {city}")
    # Add 1 to the record count.
    record_count += 1 
    # Run an API request for each of the cities.
    try:
        # Parse the JSON and retrieve data.
        city_weather = requests.get(city_url).json()
        # Parse out the needed data.
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
         
        # Convert the date to ISO standard.
        city_date = datetime.utcfromtimestamp(city_weather["dt"]).strftime('%Y-%m-%d %H:%M:%S')
        # Append the city information into city_data list.
        city_data.append({"City": city.title(),
                          "Lat": city_lat,
                          "Lng": city_lng,
                          "Max Temp": city_max_temp,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country,
                          "Date": city_date})

# If an error is experienced, skip the city.
    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete.
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")

Processing Record 1 of Set 12 | jamestown
Processing Record 2 of Set 12 | busselton
Processing Record 3 of Set 12 | yellowknife
Processing Record 4 of Set 12 | nishihara
Processing Record 5 of Set 12 | hermanus
Processing Record 6 of Set 12 | yar-sale
Processing Record 7 of Set 12 | labuhan
Processing Record 8 of Set 12 | churapcha
Processing Record 9 of Set 12 | tutoia
Processing Record 10 of Set 12 | mataura
Processing Record 11 of Set 12 | hithadhoo
Processing Record 12 of Set 12 | albany
Processing Record 13 of Set 12 | taolanaro
City not found. Skipping...
Processing Record 14 of Set 12 | atuona
Processing Record 15 of Set 12 | rikitea
Processing Record 16 of Set 12 | alexandria
Processing Record 17 of Set 12 | kavieng
Processing Record 18 of Set 12 | nikolskoye
Processing Record 19 of Set 12 | swan hill
Processing Record 20 of Set 12 | attawapiskat
City not found. Skipping...
Processing Record 21 of Set 12 | vaitupu
City not found. Skipping...
Processing Record 22 of Set 12 | new

Processing Record 37 of Set 15 | lebu
Processing Record 38 of Set 15 | nuuk
Processing Record 39 of Set 15 | naze
Processing Record 40 of Set 15 | pullman
Processing Record 41 of Set 15 | half moon bay
Processing Record 42 of Set 15 | grand river south east
City not found. Skipping...
Processing Record 43 of Set 15 | namatanai
Processing Record 44 of Set 15 | muhororo
Processing Record 45 of Set 15 | lasa
Processing Record 46 of Set 15 | vila velha
Processing Record 47 of Set 15 | cockburn town
Processing Record 48 of Set 15 | port lincoln
Processing Record 49 of Set 15 | talnakh
Processing Record 50 of Set 15 | veraval
Processing Record 1 of Set 16 | ngukurr
City not found. Skipping...
Processing Record 2 of Set 16 | gat
Processing Record 3 of Set 16 | roswell
Processing Record 4 of Set 16 | kloulklubed
Processing Record 5 of Set 16 | shimoda
Processing Record 6 of Set 16 | mount isa
Processing Record 7 of Set 16 | achisay
City not found. Skipping...
Processing Record 8 of Set 16 | id

Processing Record 20 of Set 19 | amderma
City not found. Skipping...
Processing Record 21 of Set 19 | haines junction
Processing Record 22 of Set 19 | chicama
Processing Record 23 of Set 19 | pleshanovo
Processing Record 24 of Set 19 | mazagao
Processing Record 25 of Set 19 | kogon
Processing Record 26 of Set 19 | severo-kurilsk
Processing Record 27 of Set 19 | touros
Processing Record 28 of Set 19 | alyangula
Processing Record 29 of Set 19 | hambantota
Processing Record 30 of Set 19 | emerald
Processing Record 31 of Set 19 | bandarbeyla
Processing Record 32 of Set 19 | meulaboh
Processing Record 33 of Set 19 | morgan city
Processing Record 34 of Set 19 | kalmunai
Processing Record 35 of Set 19 | pokrovsk
Processing Record 36 of Set 19 | mayo
Processing Record 37 of Set 19 | andenes
Processing Record 38 of Set 19 | haimen
Processing Record 39 of Set 19 | minab
Processing Record 40 of Set 19 | muros
Processing Record 41 of Set 19 | bilibino
Processing Record 42 of Set 19 | miri
Processi

Processing Record 6 of Set 23 | komsomolskiy
Processing Record 7 of Set 23 | umm lajj
Processing Record 8 of Set 23 | gatton
Processing Record 9 of Set 23 | mehamn
Processing Record 10 of Set 23 | mbandaka
Processing Record 11 of Set 23 | biskamzha
Processing Record 12 of Set 23 | ixtapa
Processing Record 13 of Set 23 | tagab
Processing Record 14 of Set 23 | vrangel
Processing Record 15 of Set 23 | raymondville
Processing Record 16 of Set 23 | juifang
City not found. Skipping...
Processing Record 17 of Set 23 | wunsiedel
Processing Record 18 of Set 23 | iisaku
Processing Record 19 of Set 23 | tabou
Processing Record 20 of Set 23 | auxerre
Processing Record 21 of Set 23 | kuntaur
Processing Record 22 of Set 23 | jumla
Processing Record 23 of Set 23 | kushima
Processing Record 24 of Set 23 | talaya
Processing Record 25 of Set 23 | vidim
Processing Record 26 of Set 23 | petropavlovsk-kamchatskiy
Processing Record 27 of Set 23 | saint-georges
Processing Record 28 of Set 23 | yakuplu
Proces

In [9]:
# Convert the array of dictionaries to a Pandas DataFrame.
city_data_df = pd.DataFrame(city_data)
city_data_df.head(10) 

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Jamestown,42.1,-79.24,35.01,92,1,6.93,US,2020-11-29 00:24:09
1,Busselton,-33.65,115.33,59.11,61,92,4.79,AU,2020-11-29 00:16:44
2,Yellowknife,62.46,-114.35,3.0,84,90,12.75,CA,2020-11-29 00:16:45
3,Nishihara,26.18,127.76,66.2,68,75,10.29,JP,2020-11-29 00:17:59
4,Hermanus,-34.42,19.23,54.0,91,17,8.28,ZA,2020-11-29 00:16:49
5,Yar-Sale,66.83,70.83,6.3,93,80,10.36,RU,2020-11-29 00:16:56
6,Labuhan,-6.88,112.21,80.08,83,99,6.02,ID,2020-11-29 00:21:28
7,Churapcha,62.0,132.43,-24.92,86,5,1.9,RU,2020-11-29 00:24:10
8,Tutoia,-2.76,-42.27,80.44,80,32,11.32,BR,2020-11-29 00:24:10
9,Mataura,-46.19,168.86,55.0,82,61,8.01,NZ,2020-11-29 00:16:38


In [10]:
city_data_reordered = ['City', 'Country', 'Date', 'Lat', 'Lng', 'Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed']  
city_data_df = city_data_df[city_data_reordered] 
city_data_df 

Unnamed: 0,City,Country,Date,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed
0,Jamestown,US,2020-11-29 00:24:09,42.10,-79.24,35.01,92,1,6.93
1,Busselton,AU,2020-11-29 00:16:44,-33.65,115.33,59.11,61,92,4.79
2,Yellowknife,CA,2020-11-29 00:16:45,62.46,-114.35,3.00,84,90,12.75
3,Nishihara,JP,2020-11-29 00:17:59,26.18,127.76,66.20,68,75,10.29
4,Hermanus,ZA,2020-11-29 00:16:49,-34.42,19.23,54.00,91,17,8.28
...,...,...,...,...,...,...,...,...,...
527,Yakuplu,TR,2020-11-29 00:25:13,40.99,28.68,48.00,87,0,2.24
528,Prieska,ZA,2020-11-29 00:25:13,-29.66,22.75,67.35,51,0,16.62
529,Atasu,KZ,2020-11-29 00:21:39,48.68,71.64,11.71,96,100,11.61
530,Padre Paraiso,BR,2020-11-29 00:25:13,-17.07,-41.48,63.95,97,10,5.73


In [None]:
# Create the output file (CSV).
output_data_file = "weather_data/cities.csv"
# Export the City_Data into a CSV.
city_data_df.to_csv(output_data_file, index_label="City_ID") 

In [None]:
#6.3.1
# Extract relevant fields from the DataFrame for plotting.
lats = city_data_df["Lat"]
max_temps = city_data_df["Max Temp"]
humidity = city_data_df["Humidity"]
cloudiness = city_data_df["Cloudiness"]
wind_speed = city_data_df["Wind Speed"] 

In [None]:
# Get today's date in seconds.
today = time.time()
today 

In [None]:
# Build the scatter plot for latitude vs. max temperature.
plt.scatter(lats,
            max_temps,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Max Temperature "+ time.strftime("%x"))
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save the figure.
plt.savefig("weather_data/Fig1.png")

# Show plot.
plt.show() 

In [None]:
#6.3.2
# Build the scatter plots for latitude vs. humidity.
plt.scatter(lats,
            humidity,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Humidity "+ time.strftime("%x"))
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig2.png")
# Show plot.
plt.show() 

In [None]:
#6.3.3
# Build the scatter plots for latitude vs. cloudiness.
plt.scatter(lats,
            cloudiness,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Cloudiness (%) "+ time.strftime("%x"))
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig3.png")
# Show plot.
plt.show() 

In [None]:
#6.3.4
# Build the scatter plots for latitude vs. wind speed.
plt.scatter(lats,
            wind_speed,
            edgecolor="black", linewidths=1, marker="o",
            alpha=0.8, label="Cities")

# Incorporate the other graph properties.
plt.title(f"City Latitude vs. Wind Speed "+ time.strftime("%x"))
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)
# Save the figure.
plt.savefig("weather_data/Fig4.png")
# Show plot.
plt.show() 

In [None]:
# Create a function to create perform linear regression on the weather data
# and plot a regression line and the equation with the data.
#def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):


In [None]:
# Import linregress
from scipy.stats import linregress

# Create a function to create perform linear regression on the weather data
# and plot a regression line and the equation with the data.
def plot_linear_regression(x_values, y_values, title, y_label, text_coordinates):

    # Run regression on hemisphere weather data.
    (slope, intercept, r_value, p_value, std_err) = linregress(x_values, y_values)

    # Calculate the regression line "y values" from the slope and intercept.
    regress_values = x_values * slope + intercept
    # Get the equation of the line.
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    # Create a scatter plot and plot the regression line.
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r")
    # Annotate the text for the line equation.
    plt.annotate(line_eq, text_coordinates, fontsize=15, color="red")
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.show() 

In [None]:
index13 = city_data_df.loc[13] 
index13 

In [None]:
city_data_df['Lat'] >= 0 

In [None]:
city_data_df.loc[(city_data_df["Lat"] >= 0)].head() 

In [None]:
# Create Northern and Southern Hemisphere DataFrames.
northern_hemi_df = city_data_df.loc[(city_data_df["Lat"] >= 0)]
southern_hemi_df = city_data_df.loc[(city_data_df["Lat"] < 0)] 

In [None]:
northern_hemi_df.head() 

In [None]:
southern_hemi_df.head() 

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(10,40)) 

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Max Temp"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for Maximum Temperature', 'Max Temp',(-50,90)) 

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for % Humidity', '% Humidity',(40,10)) 

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Humidity"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for % Humidity', '% Humidity',(-50,15)) 

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \
                        for % Cloudiness', '% Cloudiness',(40,10)) 

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Cloudiness"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for % Cloudiness', '% Cloudiness',(-50,60)) 

In [None]:
# Linear regression on the Northern Hemisphere
x_values = northern_hemi_df["Lat"]
y_values = northern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Northern Hemisphere \ 
                        for Wind Speed', 'Wind Speed',(40,35)) 

In [None]:
# Linear regression on the Southern Hemisphere
x_values = southern_hemi_df["Lat"]
y_values = southern_hemi_df["Wind Speed"]
# Call the function.
plot_linear_regression(x_values, y_values,
                       'Linear Regression on the Southern Hemisphere \
                        for Wind Speed', 'Wind Speed',(-50,35)) 