# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from scipy.stats import linregress
from pprint import pprint
from colorama import Fore
from colorama import Style
import datetime
x = datetime.datetime.now()

# Automatically set Data Frames to 2 decimal places
pd.options.display.precision = 2

# Hide warning messages in notebook
import warnings
warnings.filterwarnings('ignore')

# print(x.strftime("%d""/""%m""/""%Y"))

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

628


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# Specify the API URL and that we want to return the units of measurement to return the information in 
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
# cities = ["Perth", "Melbourne", "abcd", "Superman", "Karratha", "Brisbane"]

# Build partial query URL 
query_url = f"{url}&units={units}&appid={weather_api_key}&q="

# Set up lists to hold response information 
temperature = []
latitude = []
longitude = []
humidity = []
cloudiness = []
wind_speed = []
city_name = []

In [4]:
city_count = 1
total_cities = len(cities)
print("-----------------------------------")
print("Starting API Call - Retrieving Data")
print("-----------------------------------")

# Loop through the list of cities and perform a request for data on each city 
for city in cities:

# Call the API and if successful, append the information for each city into the relevant lists
    response = requests.get(query_url + city).json()
    
    print(f"Making request number: {city_count} of {total_cities} | {city}")
    
    try: 
        city_count = city_count + 1
        temperature.append(response["main"]["temp"])    
        latitude.append(response["coord"]["lat"])
        longitude.append(response["coord"]["lon"])
        humidity.append(response["main"]["humidity"])
        cloudiness.append(response["clouds"]["all"])
        wind_speed.append(response["wind"]["speed"])
        city_name.append(response["name"])

# If unsuccessful, do not append and print below statement

    except(KeyError, IndexError):
        print(f"{Fore.RED}Request number {city_count-1}: {city} - Not found. Skipping....{Style.RESET_ALL}")
         
    
print("-----------------------------------")
print("Data Retrieval Finalised")
print("-----------------------------------")

-----------------------------------
Starting API Call - Retrieving Data
-----------------------------------
Making request number: 1 of 628 | jamestown
Making request number: 2 of 628 | port elizabeth
Making request number: 3 of 628 | altay
Making request number: 4 of 628 | braslav
[31mRequest number 4: braslav - Not found. Skipping....[0m
Making request number: 5 of 628 | portland
Making request number: 6 of 628 | qaanaaq
Making request number: 7 of 628 | kapaa
Making request number: 8 of 628 | hilo
Making request number: 9 of 628 | arraial do cabo
Making request number: 10 of 628 | nanortalik
Making request number: 11 of 628 | hobart
Making request number: 12 of 628 | castro
Making request number: 13 of 628 | pangnirtung
Making request number: 14 of 628 | high level
Making request number: 15 of 628 | lidkoping
Making request number: 16 of 628 | mount gambier
Making request number: 17 of 628 | tsihombe
[31mRequest number 17: tsihombe - Not found. Skipping....[0m
Making request num

ConnectionError: ('Connection aborted.', TimeoutError(10060, 'A connection attempt failed because the connected party did not properly respond after a period of time, or established connection failed because connected host has failed to respond', None, 10060, None))

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create a data frame to hold requested information
weather_df = pd.DataFrame({"City": city_name, "Temperature (f)": temperature, "Latitude": latitude, "Longitude": longitude, "Humidity (%)": humidity, "Cloudiness (%)": cloudiness, "Wind Speed (mph)": wind_speed})
weather_df 

In [None]:
# Export to CSV
weather_df.to_csv("../output_data/temperature_file.csv", index=False, header=True)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Find out if there are any values 100% of over for Humidity
weather_df.describe()

The max value for Humidity is 100 meaning that we need to drop some values

In [None]:
# Drop any values that are 100% or over for Humidity.
humidity_drop = weather_df.loc[weather_df["Humidity (%)"]<100]
humidity_drop

In [None]:
# Export the City_Data into a csv
humidity_drop.to_csv("../output_data/humidity_drop.csv", index=False, header=True)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Generate a scatter plot of latitude versus temperature
plt.scatter(humidity_drop["Latitude"], humidity_drop["Temperature (f)"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"City Latitude vs Temperature (F) on {date}")

# Save image
plt.tight_layout()
plt.savefig("../output_data/City_Latitude_vs_Temperature_(F).png")
plt.show()

## Latitude vs. Humidity Plot

In [None]:
# Generate a scatter plot of latitude versus humidity

plt.scatter(humidity_drop["Latitude"], humidity_drop["Humidity (%)"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"City Latitude vs Humidity (%) on {date}")

# Save image
plt.tight_layout()
plt.savefig("../output_data/City_Latitude_vs_Humidity_(%).png")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Generate a scatter plot of latitude versus cloudiness

plt.scatter(humidity_drop["Latitude"], humidity_drop["Cloudiness (%)"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"City Latitude vs Cloudiness (%) on {date}")

# Save image
plt.tight_layout()
plt.savefig("../output_data/City_Latitude_vs_Cloudiness_(%).png")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Generate a scatter plot of latitude versus wind speed

plt.scatter(humidity_drop["Latitude"], humidity_drop["Wind Speed (mph)"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"City Latitude vs Wind Speed (mph) on {date}")

# Save image
plt.tight_layout()
plt.savefig("../output_data/City_Latitude_vs_Wind_Speed_(mph).png")
plt.show()

## Linear Regression

In [None]:
# Create Northern Hemisphere Data Frames
northern = humidity_drop.loc[humidity_drop["Latitude"]>=0]
northern

In [None]:
# Create Southern Hemisphere Data Frames
southern = humidity_drop.loc[humidity_drop["Latitude"]<0]
southern

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Generate a scatter plot of Northern Hemisphere Temp versus Latitude

plt.scatter(northern["Temperature (f)"], northern["Latitude"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Temperature (f)")
plt.ylabel("Latitude")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"Northern Hemisphere Temperature (f) vs Latitude on {date}")

# Calculate the linear regression and correlation
(slope, intercept,r_value,p_value,std_err) = st.linregress(northern["Temperature (f)"], northern["Latitude"])
regress_values = (northern["Temperature (f)"]) * slope + intercept

plt.plot((northern["Temperature (f)"]),regress_values,"red");
line_annotation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.text(35.8,25, line_annotation, fontsize = 10, color="red");

correlation = st.pearsonr(northern["Temperature (f)"], northern["Latitude"])
print(f"\nThe correlation between Temperature (f) and Latitude is {round(correlation[0],2)}\n")

# Save image
plt.tight_layout()
plt.savefig("../output_data/Northern_Hemisphere_Temperature_(f)_vs_Latitude.png")
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Generate a scatter plot of Southern Hemisphere Temp versus Latitude

plt.scatter(southern["Temperature (f)"], southern["Latitude"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Temperature (f)")
plt.ylabel("Latitude")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"Southern Hemisphere Temperature (f) vs Latitude on {date}")

# Calculate the linear regression and correlation
(slope, intercept,r_value,p_value,std_err) = st.linregress(southern["Temperature (f)"], southern["Latitude"])
regress_values = (southern["Temperature (f)"]) * slope + intercept

plt.plot((southern["Temperature (f)"]),regress_values,"red");
line_annotation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.text(70,-50, line_annotation, fontsize = 10, color="red");

correlation = st.pearsonr(southern["Temperature (f)"], southern["Latitude"])
print(f"\nThe correlation between Temperature (f) and Latitude is {round(correlation[0],2)}\n")

# Save image
plt.tight_layout()
plt.savefig("../output_data/Southern_Hemisphere_Temperature_(f)_vs_Latitude.png")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Generate a scatter plot of Northern Hemisphere Humidity versus Latitude

plt.scatter(northern["Humidity (%)"], northern["Latitude"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Humidity (%)")
plt.ylabel("Latitude")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"Northern Hemisphere Humidity (%) vs Latitude on {date}")

# Calculate the linear regression and correlation
(slope, intercept,r_value,p_value,std_err) = st.linregress(northern["Humidity (%)"], northern["Latitude"])
regress_values = (northern["Humidity (%)"]) * slope + intercept

plt.plot((northern["Humidity (%)"]),regress_values,"red");
line_annotation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.text(5,5, line_annotation, fontsize = 10, color="red");

correlation = st.pearsonr(northern["Humidity (%)"], northern["Latitude"])
print(f"\nThe correlation between Humidity (%) and Latitude is {round(correlation[0],2)}\n")

# Save image
plt.tight_layout()
plt.savefig("../output_data/Northern_Hemisphere_Humidity_(%)_vs_Latitude.png")
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Generate a scatter plot of Southern Hemisphere Humidity versus Latitude

plt.scatter(southern["Humidity (%)"], southern["Latitude"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Humidity (%)")
plt.ylabel("Latitude")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"Southern Hemisphere Humidity (%) vs Latitude on {date}")

# Calculate the linear regression and correlation
(slope, intercept,r_value,p_value,std_err) = st.linregress(southern["Humidity (%)"], southern["Latitude"])
regress_values = (southern["Humidity (%)"]) * slope + intercept

plt.plot((southern["Humidity (%)"]),regress_values,"red");
line_annotation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.text(20,-50, line_annotation, fontsize = 10, color="red");

correlation = st.pearsonr(southern["Humidity (%)"], southern["Latitude"])
print(f"\nThe correlation between Humidity (%) and Latitude is {round(correlation[0],2)}\n")

# Save image
plt.tight_layout()
plt.savefig("../output_data/Southern_Hemisphere_Humidity_(%)_vs_Latitude.png")
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Generate a scatter plot of Northern Hemisphere Cloudiness versus Latitude

plt.scatter(northern["Cloudiness (%)"], northern["Latitude"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Cloudiness (%)")
plt.ylabel("Latitude")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"Northern Hemisphere Cloudiness (%) vs Latitude on {date}")

# Calculate the linear regression and correlation
(slope, intercept,r_value,p_value,std_err) = st.linregress(northern["Cloudiness (%)"], northern["Latitude"])
regress_values = (northern["Cloudiness (%)"]) * slope + intercept

plt.plot((northern["Cloudiness (%)"]),regress_values,"red");
line_annotation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.text(30,75, line_annotation, fontsize = 10, color="red");

correlation = st.pearsonr(northern["Cloudiness (%)"], northern["Latitude"])
print(f"\nThe correlation between Cloudiness (%) and Latitude is {round(correlation[0],2)}\n")

# Save image
plt.tight_layout()
plt.savefig("../output_data/Northen_Hemisphere_Cloudiness_(%)_vs_Latitude.png")
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Generate a scatter plot of Southern Hemisphere Cloudiness versus Latitude

plt.scatter(southern["Cloudiness (%)"], southern["Latitude"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Cloudiness (%)")
plt.ylabel("Latitude")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"Southern Hemisphere Cloudiness (%) vs Latitude on {date}")

# Calculate the linear regression and correlation
(slope, intercept,r_value,p_value,std_err) = st.linregress(southern["Cloudiness (%)"], southern["Latitude"])
regress_values = (southern["Cloudiness (%)"]) * slope + intercept

plt.plot((southern["Cloudiness (%)"]),regress_values,"red");
line_annotation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.text(20,-50, line_annotation, fontsize = 10, color="red");

correlation = st.pearsonr(southern["Cloudiness (%)"], southern["Latitude"])
print(f"\nThe correlation between Cloudiness (%) and Latitude is {round(correlation[0],2)}\n")

# Save image
plt.tight_layout()
plt.savefig("../output_data/Southern_Hemisphere_Cloudiness_(%)_vs_Latitude.png")
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Generate a scatter plot of Northern Hemisphere Wind Speed versus Latitude

plt.scatter(northern["Wind Speed (mph)"], northern["Latitude"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Wind Speed (mph)")
plt.ylabel("Latitude")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"Northern Hemisphere Wind Speed (mph) vs Latitude on {date}")

# Calculate the linear regression and correlation
(slope, intercept,r_value,p_value,std_err) = st.linregress(northern["Wind Speed (mph)"], northern["Latitude"])
regress_values = (northern["Wind Speed (mph)"]) * slope + intercept

plt.plot((northern["Wind Speed (mph)"]),regress_values,"red");
line_annotation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.text(20,45, line_annotation, fontsize = 10, color="red");

correlation = st.pearsonr(northern["Wind Speed (mph)"], northern["Latitude"])
print(f"\nThe correlation between Wind Speed (mph) and Latitude is {round(correlation[0],2)}\n")

# Save image
plt.tight_layout()
plt.savefig("../output_data/Northen_Hemisphere_Wind_Speed_(mph)_vs_Latitude.png")
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Generate a scatter plot of Southern Hemisphere Wind Speed versus Latitude

plt.scatter(southern["Wind Speed (mph)"], southern["Latitude"], marker="o", facecolors="deepskyblue", edgecolors="black")

# # Set x label and y label
plt.xlabel("Wind Speed (mph)")
plt.ylabel("Latitude")

# Date of analysis
date = (x.strftime("%d""/""%m""/""%Y"))

# Set Title with date of analysis
plt.title(f"Southern Hemisphere Wind Speed (mph) vs Latitude on {date}")

# Calculate the linear regression and correlation
(slope, intercept,r_value,p_value,std_err) = st.linregress(southern["Wind Speed (mph)"], southern["Latitude"])
regress_values = (southern["Wind Speed (mph)"]) * slope + intercept

plt.plot((southern["Wind Speed (mph)"]),regress_values,"red");
line_annotation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.text(20,-50, line_annotation, fontsize = 10, color="red");

correlation = st.pearsonr(southern["Wind Speed (mph)"], southern["Latitude"])
print(f"\nThe correlation between Wind Speed (mph) and Latitude is {round(correlation[0],2)}\n")

# Save image
plt.tight_layout()
plt.savefig("../output_data/Southern_Hemisphere_Wind_Speed_(mph)_vs_Latitude.png")
plt.show()