In [None]:
# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

## Generate Cities List

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

611

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"

# Build partial query URL
query_url = url + "appid=" + weather_api_key + "&q=" + city

In [4]:
# Get weather data
weather_response = requests.get(query_url)
print(weather_response.status_code)

weather_json = weather_response.json()

#print(json.dumps(weather_json, indent=4, sort_keys=True))

200


In [None]:
###############NEED HELP SETTING THIS UP##################
#Create a list to store the weather data
lat = []
lng = []
maxtemp = []
humid = []
cloud = []
wind = []
country = []
date = []

#loop through data to retreive weather information
for each city in cities:
    weather_json = weather_response.json()
    weather_response = requests.get(query_url + city).json()
    lat.append(weather_json['coord']['lat'])
    lng.append(weather_json['coord']['lon'])
    maxtemp.append(weather_json['main']['temp_max'])
    humid.append(weather_json['main']['humidity'])
    cloud.append(weather_json['clouds']['all'])
    wind.append(weather_json['wind']['speed'])
    country.append(weather_json['sys']['country'])
    date.append(weather_json['dt'])
    
    #Create a print log for each city as processed
    logging.info()

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# create a data frame containing the weather data for the cities
weather_dict = {
    "City": cities,
    "Lat": lat,
    "Lng": lng,
    "Max Temp": maxtemp,
    "Humidity": humid,
    "Cloudiness": cloud,
    "Wind Speed": wind,
    "Country": country,
    "Date": date}

city_df = pd.DataFrame(weather_dict)
city_df.head()

In [None]:
#show the city_df data frame with statistical analysis applied
city_df.describe()

In [None]:
# Export city data as a CSV
city_df.to_csv("weatherpy/city_df.csv", encoding="utf-8", index=False, header=True)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#Loop through city_df and remove rows where humidity is >100%
high_humidity = []

for city in city_df:
    if city_df["Humidity"] > 100:
        high_humidity.append(city_df["Humidity"])      

In [None]:
#  Get the indices of cities that have humidity over 100%.
highhumidity = high_humidity
highhumidity

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_df, which we call "clean_city_data".
clean_city_data = city_df.dropna([city_data["Humidity"] > 100], inplace=False)

#print clean weather_data
clean_city_data.describe()

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#plat scatter for City Latitude vs. Max Temp on 04/01/20
clat = clean_city_data['Lat']
fmax_temp = group[""]clean_city_data['Max Temp']

#Second option only need to clean out the above lists
# for each city in clean_city_data:
#     clat.append(clean_city_data['Lat'])
#     fmax_temp.append(clean_city_data['Max Temp'])

plt.scatter(clat, fmax_temp, marker="o", facecolors="blue", edgecolors="black", s=clat, alpha=0.75)

#set x and y limits
plt.xlim(-60,70)
plt.ylim(-20,110)

plt.title("City Latitude vs. Max Temperature (04/01/20)")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")

plt.grid()
plt.show()

## Latitude vs. Humidity Plot

In [None]:
#plat scatter for City Latitude vs. Humidity on 04/01/20
clat = clean_city_data['Lat']
chumidity = clean_city_data['Humidity']

#Second option only need to clean out the above lists
# for each city in clean_city_data:
#     clat.append(clean_city_data['Lat'])
#     chumidity.append(clean_city_data['Humidity']

plt.scatter(clat, chumidity, marker="o", facecolors="blue", edgecolors="black", s=clat, alpha=0.75)

#set x and y limits
plt.xlim(-60,70)
plt.ylim(0,100)

plt.title("City Latitude vs. Humidity (04/01/20)")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

plt.grid()
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
#plat scatter for City Latitude vs. Cloudiness on 04/01/20
clat = clean_city_data['Lat']
cclouds = clean_city_data['Cloudiness']

#Second option only need to clean out the above lists
# for each city in clean_city_data:
#     clat.append(clean_city_data['Lat'])
#     cclouds.append(clean_city_data['Cloudiness']

plt.scatter(clat, cclouds, marker="o", facecolors="blue", edgecolors="black", s=clat, alpha=0.75)

#set x and y limits
plt.xlim(-60,90)
plt.ylim(0,110)

plt.title("City Latitude vs. Cloudiness (04/01/20)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

plt.grid()
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
#plat scatter for City Latitude vs. Cloudiness on 04/01/20
clat = clean_city_data['Lat']
cwind = clean_city_data['Wind Speed']

#Second option only need to clean out the above lists
# for each city in clean_city_data:
#     clat.append(clean_city_data['Lat'])
#     cwinds.append(clean_city_data['Wind Speed']

plt.scatter(clat, cwind, marker="o", facecolors="blue", edgecolors="black", s=clat, alpha=0.75)

#set x and y limits
plt.xlim(-60,90)
plt.ylim(0,110)

plt.title("City Latitude vs. Wind Speed (04/01/20)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

plt.grid()
plt.show()

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Perform the linear regression on the scatter plot on Northern Hemisphere - Max Temp vs. Latitude
nlat = []
max_temp = group[""]

for city in clean_city_data:
    if clean_city_data["Lat"] >= 0:
        nlat.append(clean_city_data['Lat'])
        max_temp.append(clean_city_data['Max Temp'])    

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(nlat, max_temp)
regress_values = nlat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(nlat, max_temp)
plt.plot(lat, regress_values,"r-")
plt.annotate(line_eq,(18,36),fontsize=15,color="red")

plt.title('Northern Hemisphere - Max Temp vs. Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Perform the linear regression on the scatter plot on Southern Hemisphere - Max Temp vs. Latitude
slat = []
smax_temp = []

for city in clean_city_data:
    if clean_city_data["Lat"] <= 0.1:
        slat.append(clean_city_data['Lat'])
        smax_temp.append(clean_city_data['Max Temp'])    

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(slat, smax_temp)
regress_values = slat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(slat, smax_temp)
plt.plot(slat, regress_values,"r-")
plt.annotate(line_eq,(18,36),fontsize=15,color="red")

plt.title('Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Perform the linear regression on the scatter plot on Northern Hemisphere - Humidity (%) vs. Latitude
nlat = []
humidity = []

for city in clean_city_data:
    if clean_city_data["Lat"] >= 0:
        nlat.append(clean_city_data['Lat'])
        humidity.append(clean_city_data['Humidity'])  

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(nlat, humidity)
regress_values = nlat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(nlat, humidity)
plt.plot(lat, regress_values,"r-")
plt.annotate(line_eq,(18,36),fontsize=15,color="red")

plt.title('Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Perform the linear regression on the scatter plot on Southern Hemisphere - Humidity (%) vs. Latitude
slat = []
shumidity = []

for city in clean_city_data:
    if clean_city_data["Lat"] <= 0.1:
        slat.append(clean_city_data['Lat'])
        shumidity.append(clean_city_data['Humidity']
                                         
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(slat,shumidity)
regress_values = slat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(slat, shumidity)
plt.plot(slat, regress_values,"r-")
plt.annotate(line_eq,(18,36),fontsize=15,color="red")

plt.title('Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression')
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Perform the linear regression on the scatter plot on Northern Hemisphere - Cloudiness (%) vs. Latitude
nlat = []
clouds = []

for city in clean_city_data:
    if clean_city_data["Lat"] >= 0:
        nlat.append(clean_city_data['Lat'])
        clouds.append(clean_city_data['Cloudiness']) 

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(nlat, clouds)
regress_values = nlat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(nlat, clouds)
plt.plot(lat, regress_values,"r-")
plt.annotate(line_eq,(18,36),fontsize=15,color="red")

plt.title('Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
plt.xlabel('Latitude)')
plt.ylabel('Cloudiness')
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Perform the linear regression on the scatter plot on Southern Hemisphere - Cloudiness (%) vs. Latitude
slat = []
sclouds = []

for city in clean_city_data:
    if clean_city_data["Lat"] <= 0.1:
        slat.append(clean_city_data['Lat'])
        sclouds.append(clean_city_data['Cloudiness']

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(slat, sclouds)
regress_values = slat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(slat, sclouds)
plt.plot(slat, regress_values,"r-")
plt.annotate(line_eq,(18,36),fontsize=15,color="red")

plt.title('Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
plt.xlabel('Latitude)')
plt.ylabel('Cloudiness')
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Perform the linear regression on the scatter plot on Northern Hemisphere - Wind Speed (mph) vs. Latitude
nlat = []
wind = []


for city in clean_city_data:
    if clean_city_data["Lat"] >= 0:
        nlat.append(clean_city_data['Lat'])
        wind.append(clean_city_data['Wind Speed']) 

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(nlat, wind)
regress_values = nlat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(nlat, wind)
plt.plot(lat, regress_values,"r-")
plt.annotate(line_eq,(18,36),fontsize=15,color="red")

plt.title('Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
plt.xlabel('Latitude)')
plt.ylabel('Wind Speed (mph)')
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Perform the linear regression on the scatter plot on Southern Hemisphere - Wind Speed (mph) vs. Latitude
slat = []
swind = []

for city in clean_city_data:
    if clean_city_data["Lat"] <= 0.1:
        slat.append(clean_city_data['Lat'])
        swind.append(clean_city_data['Wind Speed']

(slope, intercept, rvalue, pvalue, stderr) = st.linregress(slat,swind)
regress_values = slat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(slat, swind)
plt.plot(slat, regress_values,"r-")
plt.annotate(line_eq,(18,36),fontsize=15,color="red")

plt.title('Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression')
plt.xlabel('Latitude)')
plt.ylabel('Wind Speed (mph)')
plt.show()