In [90]:
%load_ext lab_black
import pandas as pd
import json
import requests
import numpy as np
from config import weather_api_key
from config import g_key
from citipy import citipy
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

The lab_black extension is already loaded. To reload it, use:
  %reload_ext lab_black


# Part I : WeatherPy
----


## Generate Cities List

In [91]:
lat_range = (90, -90)
lon_range = (180, -180)

# Blank list to hold latitude and longitude values
lat_lon = []

# Using numpy to generate random latitude and longitude values
lat = np.random.uniform(high=90, low=-90, size=1400)
lon = np.random.uniform(high=180, low=-180, size=1400)
lat_lon = list(zip(lat, lon))

# Using "for" loop to generate city list.
city_list = []

for lat_lon in lat_lon:
    city = citipy.nearest_city(lat_lon[0], lat_lon[1]).city_name

    # Appending only non-duplicated names to the city list.
    if city not in city_list:
        city_list.append(city)

In [92]:
len(city_list)

588

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


url = "http://api.openweathermap.org/data/2.5/weather"
units = "imperial"
parameters = {
    "q": city_list,
    "appid": weather_api_key
}

In [93]:
def extract_data(data):
    return {
        "City": data["name"],
        "Lat": data["coord"]["lat"],
        "Lon": data["coord"]["lon"],
        "Max Temp": data["main"]["temp_max"],
        "Humidity": data["main"]["humidity"],
        "Cloudiness": data["clouds"]["all"],
        "Wind Speed": data["wind"]["speed"],
        "Country": data["sys"]["country"],
        "Date": data["dt"],
    }

In [94]:
city_results = []

In [97]:
# Record counter starts at 1
processing_record = 1

print("Beginning Data Retrieval")
print("----------------------------------------")

for i, city in enumerate(city_list):

    try:

        data = requests.get(
            "http://api.openweathermap.org/data/2.5/weather",
            params={"q": city, "appid": weather_api_key, "units": "imperial"},
        ).json()

        city_result = extract_data(data)
        city_results.append(city_result)

        print(f"Processing Record {processing_record} | {city}")

        processing_record += 1

    except:
        print("City not found. Skipping...")
    continue

print("----------------------------------------")
print("Data Retrieval Complete")
print("----------------------------------------")

Beginning Data Retrieval
----------------------------------------
Processing Record 1 | kemalpasa
Processing Record 2 | oyama
Processing Record 3 | harper
Processing Record 4 | san policarpo
Processing Record 5 | northam
Processing Record 6 | gillette
Processing Record 7 | aklavik
Processing Record 8 | bose
Processing Record 9 | kiama
Processing Record 10 | kismayo
Processing Record 11 | lebu
Processing Record 12 | hilo
Processing Record 13 | esperance
Processing Record 14 | grindavik
Processing Record 15 | santander jimenez
Processing Record 16 | kingman
Processing Record 17 | maltahohe
Processing Record 18 | atuona
Processing Record 19 | busselton
Processing Record 20 | talnakh
Processing Record 21 | sazonovo
Processing Record 22 | hobart
Processing Record 23 | billings
Processing Record 24 | malindi
Processing Record 25 | puerto ayora
Processing Record 26 | nageshwari
Processing Record 27 | sechura
Processing Record 28 | sur
Processing Record 29 | paracuru
Processing Record 30 | kai

In [None]:
df = pd.DataFrame(city_results)
df.head(2)

### Creating Scatter Plot to showcase the relation ship between Max Temperature and Latitude

In [None]:
# Converting data in DataFrame so that it can be used to define the X and y value needed for the scatter plot.
X = df["Lat"].values
y = df["Max Temp"].values

In [None]:
plt.title("City Latitude vs. Max Temperature (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid()
plt.scatter(X, y)
plt.show

Here we are examining the relationship between a city's latitude and it's maximum temperature. As you can see, as a city's latitude has a clear relationship to it's max temperature as we would expect. Temperatures appear to be higher the closer they are to the equator, (0) degrees latitude.

### Creating Scatter Plot to showcase the relationship between Humidity and Latitude 

In [None]:
X = df["Lat"].values
y = df["Humidity"].values

In [None]:
plt.title("City Latitude vs. Humidity (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid()
plt.scatter(X, y)
plt.show

This scatter plot is displaying the relationship between a city's latitude and it's humidity levels. There does not appear to be a clear relationship between cities with higher humidities and their latitude based on the plot. However, there does appear to be an indication that many cities that lie between around -30 degrees and 35 degrees latitude have lower humidity levels. There are basically no cities outside of that range that have humidity levels below 20%.

In [None]:
X = df["Lat"].values
y = df["Cloudiness"].values

In [None]:
plt.title("City Latitude vs. Cloudiness (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid()
plt.scatter(X, y)
plt.show

This scatter plot showcases the relationship between a cities latitude and its cloud cover. Latitude does not appear to be a major impactor of a city's cloud cover. Cities with high, medium and low cloud cover appear to be evenly distributed across all latitudes.

In [None]:
X = df["Lat"].values
y = df["Wind Speed"].values

In [None]:
plt.title("City Latitude vs. Wind Speed (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid()
plt.scatter(X, y)
plt.show

With this plot, we can see that there is perhaps a small relationship between a city's latitude and it wind speed. Although wind speed does appear to be rather equally distibuted, we do see that cities further away from the equator have the potential for higher wind speeds. If you'll notice, there are a slightly higher number of cities with latitudes further away from the equator reporting higher windspeeds.

## Linear Regression

### Temperature (F) vs. Latitude (Northern and Southern Hemisphere)

#### (Northern Hemisphere)

In [None]:
# Created dataframe to display only those cities located in the Northern Hemisphere
NH_df = df[df.Lat > 0]

In [None]:
# Converting the NH_df data so that it can be used to create scatter plots andn perform linear regression.
X = NH_df["Lat"].values
X_array = [[e] for e in X]
y = NH_df["Max Temp"].values

In [None]:
# Setting up linear regression model.
model = LinearRegression()
model.fit(X_array, y)

In [None]:
slope = model.coef_[0]
slope

In [None]:
intercept = model.intercept_
intercept

In [None]:
r_squared = model.score(X_array, y)
r_squared

In [None]:
def predict_function(max_temp):
    return model.predict([[max_temp]])[0]

In [None]:
max_temp_range = np.arange(min(X), max(X), 0.001)
predict_max_temp = [predict_function(e) for e in max_temp_range]

In [None]:
# Variables used for creating text labels on plot
formula = f"y = {round(intercept, 3)} + {round(slope, 3)}x"
r_squared_str = f"$R^{2}$={round(r_squared, 3)}"

In [None]:
plt.title("NH City Latitude vs. Max Temperature (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid()
plt.scatter(X, y)
plt.plot(max_temp_range, predict_max_temp, color="red")
plt.text(5, 35, formula, color="red")
plt.text(5, 28, r_squared_str, color="red")
plt.show

As you can see, the r squared value for this regression anaylis is quite high. This indicates that there is a strong correlation between latitude and the maximum temperatures. This is easily seen on the plot displayed above. The further you get away from the equator (0), the lower the max temperature.

#### (Southern Hemisphere)

In [None]:
# Created dataframe to display only those cities located in the Northern Hemisphere
SH_df = df[df.Lat < 0]

In [None]:
# Converting the SH_df data so that it can be used to create scatter plots andn perform linear regression.
X = SH_df["Lat"].values
X_array = [[e] for e in X]
y = SH_df["Max Temp"].values

In [None]:
# Setting up linear regression model.
model = LinearRegression()
model.fit(X_array, y)

In [None]:
slope = model.coef_[0]
slope

In [None]:
intercept = model.intercept_
intercept

In [None]:
r_squared = model.score(X_array, y)
r_squared

In [None]:
def predict_function(max_temp):
    return model.predict([[max_temp]])[0]

In [None]:
max_temp_range = np.arange(min(X), max(X), 0.001)
predict_max_temp = [predict_function(e) for e in max_temp_range]

In [None]:
# Variables used for creating text labels on plot
formula = f"y = {round(intercept, 3)} + {round(slope, 3)}x"
r_squared_str = f"$R^{2}$={round(r_squared, 3)}"

In [None]:
plt.title("SH City Latitude vs. Max Temperature (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid()
plt.scatter(X, y)
plt.plot(max_temp_range, predict_max_temp, color="red")
plt.text(-55, 83, formula, color="red")
plt.text(-55, 76, r_squared_str, color="red")
plt.show

The r squared value for this linear regression is also quite high; although not quite as high as what was indicated in the Northern Hemisphere. Again, there is a strong correlation between latitude and maximum temperature in the Southern Hemisphere as well. This is easily explained by the fact that Earth is round, and as you get further and further away from the equator in both hemispheres, less of the sun's light makes it to the surface causing maximum temperatures to be lower. Yet another notch against the "Flat Earthers" out there.

### Humidity (%) vs. Latitude (Northern and Southern Hemisphere)¶

#### (Northern Hemisphere)

In [None]:
X = NH_df["Lat"].values
X_array = [[e] for e in X]
y = NH_df["Humidity"].values

In [None]:
model = LinearRegression()
model.fit(X_array, y)

In [None]:
slope = model.coef_[0]
slope

In [None]:
intercept = model.intercept_
intercept

In [None]:
r_squared = model.score(X_array, y)
r_squared

In [None]:
def predict_function(humidity):
    return model.predict([[humidity]])[0]

In [None]:
humidity_range = np.arange(min(X), max(X), 0.001)
predict_humidity = [predict_function(e) for e in humidity_range]

In [None]:
# Variables used for creating text labels on plot
formula = f"y = {round(intercept, 3)} + {round(slope, 3)}x"
r_squared_str = f"$R^{2}$={round(r_squared, 3)}"

In [None]:
plt.title("NH City Latitude vs. Humidity (%) (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Humidity %")
plt.grid()
plt.scatter(X, y)
plt.plot(humidity_range, predict_humidity, color="red")
plt.text(52, 20, formula, color="red")
plt.text(52, 13, r_squared_str, color="red")
plt.show

The r squared value for this linear regression is quite low suggesting that there is little correlation between latitude and humidity levels. However, as I mentioned above, one can still see there is still something going on here. Although humidity levels due appear to be dispearsed throughout, you can still see that most of the lower humidit levels are grouped together between about 10 and 35 degrees latitude.

#### (Southern Hemisphere)

In [None]:
X = SH_df["Lat"].values
X_array = [[e] for e in X]
y = SH_df["Humidity"].values

In [None]:
model = LinearRegression()
model.fit(X_array, y)

In [None]:
slope = model.coef_[0]
slope

In [None]:
intercept = model.intercept_
intercept

In [None]:
r_squared = model.score(X_array, y)
r_squared

In [None]:
def predict_function(humidity):
    return model.predict([[humidity]])[0]

In [None]:
humidity_range = np.arange(min(X), max(X), 0.001)
predict_humidity = [predict_function(e) for e in humidity_range]

In [None]:
# Variables used for creating text labels on plot
formula = f"y = {round(intercept, 3)} + {round(slope, 3)}x"
r_squared_str = f"$R^{2}$={round(r_squared, 3)}"

In [None]:
plt.title("SH City Latitude vs. Humidity (%) (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Humidity %")
plt.grid()
plt.scatter(X, y)
plt.plot(humidity_range, predict_humidity, color="red")
plt.text(-55, 20, formula, color="red")
plt.text(-55, 13, r_squared_str, color="red")
plt.show

The same is true for the Southern Hemisphere. The r squared value for this linear regression is also low suggesting that there is little correlation between latitude and humidity levels. Again; however, we do see there is still something going on with lower humidity levels. Although humidity levels as whole due appear to be dispearsed throughout, you can still see that most of the lower humidit levels are grouped together between about -25 and -10 degrees latitude. One would assume that most deserts are located at this latitudal levels all over the planet.

### Cloudiness (%) vs. Latitude (Northern and Southern Hemisphere)¶

#### (Northern Hemisphere)

In [None]:
X = NH_df["Lat"].values
X_array = [[e] for e in X]
y = NH_df["Cloudiness"].values

In [None]:
model = LinearRegression()
model.fit(X_array, y)

In [None]:
slope = model.coef_[0]
slope

In [None]:
intercept = model.intercept_
intercept

In [None]:
r_squared = model.score(X_array, y)
r_squared

In [None]:
def predict_function(cloudiness):
    return model.predict([[cloudiness]])[0]

In [None]:
cloudiness_range = np.arange(min(X), max(X), 0.001)
predict_cloudiness = [predict_function(e) for e in cloudiness_range]

In [None]:
# Variables used for creating text labels on plot
formula = f"y = {round(intercept, 3)} + {round(slope, 3)}x"
r_squared_str = f"$R^{2}$={round(r_squared, 3)}"

In [None]:
plt.title("NH City Latitude vs. Cloudiness (%) (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness %")
plt.grid()
plt.scatter(X, y)
plt.plot(cloudiness_range, predict_cloudiness, color="red")
plt.text(85, 20, formula, color="red")
plt.text(85, 13, r_squared_str, color="red")
plt.show

The R squared value for this linear regression is the lowest we've seen so far. There is apparently little to know correlation betwen cloudiness and latitude in the Northern Hemisphere.

#### (Southern Hemisphere)

In [None]:
X = SH_df["Lat"].values
X_array = [[e] for e in X]
y = SH_df["Cloudiness"].values

In [None]:
model = LinearRegression()
model.fit(X_array, y)

In [None]:
slope = model.coef_[0]
slope

In [None]:
intercept = model.intercept_
intercept

In [None]:
r_squared = model.score(X_array, y)
r_squared

In [None]:
def predict_function(cloudiness):
    return model.predict([[cloudiness]])[0]

In [None]:
cloudiness_range = np.arange(min(X), max(X), 0.001)
predict_cloudiness = [predict_function(e) for e in cloudiness_range]

In [None]:
# Variables used for creating text labels on plot
formula = f"y = {round(intercept, 3)} + {round(slope, 3)}x"
r_squared_str = f"$R^{2}$={round(r_squared, 3)}"

In [None]:
plt.title("SH City Latitude vs. Cloudiness (%) (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness %")
plt.grid()
plt.scatter(X, y)
plt.plot(cloudiness_range, predict_cloudiness, color="red")
plt.text(5, 20, formula, color="red")
plt.text(5, 13, r_squared_str, color="red")
plt.show

The R squared value for this linear regression is also quite low. Again, there is apparently little to know correlation betwen cloudiness and latitude in the Southern Hemisphere as well.

### Wind Speed (mph) vs. Latitude (Northern and Southern Hemisphere)¶

#### (Northern Hemisphere)

In [None]:
X = NH_df["Lat"].values
X_array = [[e] for e in X]
y = NH_df["Wind Speed"].values

In [None]:
model = LinearRegression()
model.fit(X_array, y)

In [None]:
slope = model.coef_[0]
slope

In [None]:
intercept = model.intercept_
intercept

In [None]:
r_squared = model.score(X_array, y)
r_squared

In [None]:
def predict_function(wind_speed):
    return model.predict([[wind_speed]])[0]

In [None]:
wind_speed_range = np.arange(min(X), max(X), 0.001)
predict_wind_speed_range = [predict_function(e) for e in wind_speed_range]

In [None]:
# Variables used for creating text labels on plot
formula = f"y = {round(intercept, 3)} + {round(slope, 3)}x"
r_squared_str = f"$R^{2}$={round(r_squared, 3)}"

In [None]:
plt.title("NH City Latitude vs. Wind Speed (mph) (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid()
plt.scatter(X, y)
plt.plot(wind_speed_range, predict_wind_speed_range, color="red")
plt.text(5, 48, formula, color="red")
plt.text(5, 41, r_squared_str, color="red")
plt.show

The data points here are quite spread out, and the R Squared value is low. This indicates that there is little to know correlation between latitude and wind speed. However, much like humidity, there are some data points that are of interest. You can see that there are a couple of cities located far to the north report far higher wind speeds than others in this group.

#### (Southern Hemisphere)

In [None]:
X = SH_df["Lat"].values
X_array = [[e] for e in X]
y = SH_df["Wind Speed"].values

In [None]:
model = LinearRegression()
model.fit(X_array, y)

In [None]:
slope = model.coef_[0]
slope

In [None]:
intercept = model.intercept_
intercept

In [None]:
r_squared = model.score(X_array, y)
r_squared

In [None]:
def predict_function(wind_speed):
    return model.predict([[wind_speed]])[0]

In [None]:
wind_speed_range = np.arange(min(X), max(X), 0.001)
predict_wind_speed_range = [predict_function(e) for e in wind_speed_range]

In [None]:
# Variables used for creating text labels on plot
formula = f"y = {round(intercept, 3)} + {round(slope, 3)}x"
r_squared_str = f"$R^{2}$={round(r_squared, 3)}"

In [None]:
plt.title("SH City Latitude vs. Wind Speed (mph) (09/21/2020)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid()
plt.scatter(X, y)
plt.plot(wind_speed_range, predict_wind_speed_range, color="red")
plt.text(-55, 22, formula, color="red")
plt.text(-55, 20, r_squared_str, color="red")
plt.show

Although the R Squared value is very low, one can see a small trend. you can also see that like in the Northern Hemisphere there are some data anomalies that require further analysis.