# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from pprint import pprint
import time
import scipy.stats as st
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

# OpenWeather Request

In [None]:
#URL for request setup
url = "http://api.openweathermap.org/data/2.5/weather?"
#add the weather api and city name to url above to request data for
query_url = url + "q=" + city + "&appid=" + weather_api_key #Changed up the url just for experimenting
#print(query_url)

# Request weather data
weather_response = requests.get(query_url)
weather_json = weather_response.json()

# Data from the response
pprint(weather_json)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
#Intitialize List for use in DF
City = []          
Lat = []         
Lng = []         
Max_Temp = []      
Humidity = []     
Cloudiness = []   
Wind_Speed = []   
Country = []       
Date = []

#Set Iteration
i = 1
set_count = 1

#City is for url appending and cities holds our list of cities to examine

print("Beginning Data Retrieval")     
print("-----------------------------")

for city in cities:
    query_url = url + "appid=" + weather_api_key + "&q=" + city
    weather_response = requests.get(query_url)
    weather_json = weather_response.json()
    
    try:
        pprint("Processing Record " + str(i) + " of Set " + str(set_count) + "| " + weather_json['name'])
        
        i +=1
        if i == 51:
            set_count = set_count + 1
            i=1
        
        City.append(weather_json['name'])          
        Lat.append(weather_json['coord']['lat'])         
        Lng.append(weather_json['coord']['lon'])         
        Max_Temp.append((weather_json['main']['temp_max']-273.15)*1.8+32)      
        Humidity.append(weather_json['main']['humidity'])     
        Cloudiness.append(weather_json['clouds']["all"])   
        Wind_Speed.append(weather_json['wind']['speed'])   
        Country.append(weather_json['sys']['country'])       
        Date.append(weather_json['dt'])
        
    except (KeyError):
        print("'City not found. Skipping...''")
    
print("-----------------------------")
print("Data Retrieval Complete")      
print("-----------------------------")
      

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
df=pd.DataFrame({
    "City":City,
    "Latitude":Lat,
    "Longitude":Lng,
    "Max Temp (F)":Max_Temp,
    "Humidity":Humidity,
    "Cloudiness":Cloudiness,
    "Wind Speed":Wind_Speed,
    "Country":Country,
    "Date":Date
})

df.to_csv("output_data/Weather_Data.csv", index=False)

df

In [None]:
df.nunique()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#SKIPPED STEP

#Humidity_DF = df.loc[df["Humidity"]>100]
#Humidity_DF

In [None]:
#  Get the indices of cities that have humidity over 100%.
#Humidity_DF.index

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
#df.drop(df[df['Humidity'] < 100].index, inplace = False)

In [None]:
# Extract relevant fields from the data frame


# Export the City_Data into a csv


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#Scatter Plot
plt.scatter(df["Latitude"], df["Max Temp (F)"], edgecolor = "black")
plt.title("City Latitude vs Max Temperature 07/19/2020")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature")
plt.grid(True)
plt.show()

## Latitude vs. Humidity Plot

In [None]:
#Scatter Plot
plt.scatter(df["Latitude"], df["Humidity"], edgecolor = "black")
plt.title("City Latitude vs Humidity 07/19/2020")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid(True)
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
#Scatter Plot
plt.scatter(df["Latitude"], df["Cloudiness"], edgecolor = "black")
plt.title("City Latitude vs Cloudiness 07/19/2020")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid(True)
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
#Scatter Plot
plt.scatter(df["Latitude"], df["Wind Speed"], edgecolor = "black")
plt.title("City Latitude vs Wind Speed 07/19/2020")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.grid(True)
plt.show()

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots

In [None]:
# Create Northern and Southern Hemisphere DataFrames

North_df = df[df["Latitude"]>0]
North_df=North_df.reset_index()
North_df

South_df = df[df["Latitude"]<0]
South_df=South_df.reset_index()
South_df

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Correlation Coefficient
correlation = st.pearsonr(North_df["Max Temp (F)"], North_df["Latitude"])
correlation = correlation[0]*correlation[0] 
print(f"The r-squared is: {correlation}")

#Linear Regression 
x_values = North_df["Latitude"]
y_values = North_df["Max Temp (F)"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values) #import linregress from scipy, see imports
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(0,40),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Maximum Temperature (F)")
plt.title("Northern Hemisphere - Max Temp vs. Latitude")
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Correlation Coefficient
correlation = st.pearsonr(South_df["Max Temp (F)"], South_df["Latitude"])
correlation = correlation[0]*correlation[0] 
print(f"The r-squared is: {correlation}")

#Linear Regression 
x_values = South_df["Latitude"]
y_values = South_df["Max Temp (F)"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values) #import linregress from scipy, see imports
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-40,30),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Maximum Temperature (F)")
plt.title("Southern Hemisphere - Max Temp vs. Latitude")
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Correlation Coefficient
correlation = st.pearsonr(North_df["Humidity"], North_df["Latitude"])
correlation = correlation[0]*correlation[0] 
print(f"The r-squared is: {correlation}")

#Linear Regression 
x_values = North_df["Latitude"]
y_values = North_df["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values) #import linregress from scipy, see imports
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,10),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Northern Hemisphere - Humidity (%) vs. Latitude")
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Correlation Coefficient
correlation = st.pearsonr(South_df["Humidity"], South_df["Latitude"])
correlation = correlation[0]*correlation[0] 
print(f"The r-squared is: {correlation}")

#Linear Regression 
x_values = South_df["Latitude"]
y_values = South_df["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values) #import linregress from scipy, see imports
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,3))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,25),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Southern Hemisphere - Humidity (%) vs. Latitude")
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Correlation Coefficient
correlation = st.pearsonr(North_df["Cloudiness"], North_df["Latitude"])
correlation = correlation[0]*correlation[0] 
print(f"The r-squared is: {correlation}")

#Linear Regression 
x_values = North_df["Latitude"]
y_values = North_df["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values) #import linregress from scipy, see imports
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,3))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,25),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Northern Hemisphere - Cloudiness (%) vs. Latitude")
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Correlation Coefficient
correlation = st.pearsonr(South_df["Cloudiness"], South_df["Latitude"])
correlation = correlation[0]*correlation[0] 
print(f"The r-squared is: {correlation}")

#Linear Regression 
x_values = South_df["Latitude"]
y_values = South_df["Cloudiness"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values) #import linregress from scipy, see imports
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,3))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-35,55),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Southern Hemisphere - Cloudiness (%) vs. Latitude")
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Correlation Coefficient
correlation = st.pearsonr(North_df["Wind Speed"], North_df["Latitude"])
correlation = correlation[0]*correlation[0] 
print(f"The r-squared is: {correlation}")

#Linear Regression 
x_values = North_df["Latitude"]
y_values = North_df["Wind Speed"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values) #import linregress from scipy, see imports
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,3))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,11),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Northern Hemisphere - Wind Speed (mph) vs. Latitude")
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Correlation Coefficient
correlation = st.pearsonr(South_df["Wind Speed"], South_df["Latitude"])
correlation = correlation[0]*correlation[0] 
print(f"The r-squared is: {correlation}")

#Linear Regression 
x_values = South_df["Latitude"]
y_values = South_df["Wind Speed"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values) #import linregress from scipy, see imports
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,3))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-50,9),fontsize=15,color="red")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Southern Hemisphere - Wind Speed (mph) vs. Latitude")
plt.show()