# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
#Dependencies
import requests
import json
from api_keys import weather_api_key
import numpy as np
import pandas as pd
from citipy import citipy
import matplotlib.pyplot as plt
import scipy.stats as st
from scipy.stats import linregress

url = "http://api.openweathermap.org/data/2.5/weather?"


## Generate Cities List

In [None]:
#Create variables for the random selcetion of latitudes and longitudes with limits
lat_range = (-90, 90)
lng_range = (-180, 180)

# Create lists for holding coordinates and city names
lat_lngs = []
cities = []

# Create a set of 1200 random latitude and longitude combinations
##NB: Many of these combinations will return the same city, so more are generated to end with 
## the minimum required 500 cities

lats = np.random.uniform(low=-90.000, high=90.000, size=1200)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1200)

#Zip the lats and longs together to form coordinate pairs
lats_lngs = zip(lats, lngs)

#Loop through the list, using citipy to locate the closest city to each coordinate set
for lat_lng in lats_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    cities.append(city)



In [None]:
#create a dataframe to hold city information
df = pd.DataFrame({"City Name": cities,
                   "Latitude": lats,
                   "Longitude": lngs,
                   "Max Temp": "",
                   "Humidity": "",
                   "Cloudiness": "",
                   "Wind Speed": "",
                   "Country": ""})

#Remove any duplicate rows based on city name
df = df.drop_duplicates(subset=["City Name"], ignore_index=True)
df

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Use a for loop to build the query url from DataFrame of city names
## Use try and except functions to skip over rows with no weather information available
for index, row in df.iterrows():
    
    try:
        city = row["City Name"]
    
        query_url = f"{url}appid={weather_api_key}&q={city}&units=metric"
    
        response = requests.get(query_url)
        cities_info = response.json()
    
        df.loc[index, "Max Temp"] = cities_info["main"]["temp_max"]
        df.loc[index, "Humidity"] = cities_info["main"]["humidity"]
        df.loc[index, "Cloudiness"] = cities_info["clouds"]["all"]
        df.loc[index, "Wind Speed"] = cities_info["wind"]["speed"]
        df.loc[index, "Country"] = cities_info["sys"]["country"]
    
    except KeyError:
        print(f"No data available for {city}")
    
    #Print log of city as it is checked
    print(f"Checking weather at {city}, city number {index + 1}")


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#Convert values from API to numerical values, in order to plot later
df["Max Temp"] = pd.to_numeric(df["Max Temp"], errors='coerce')
df["Humidity"] = pd.to_numeric(df["Humidity"], errors='coerce')
df["Cloudiness"] = pd.to_numeric(df["Cloudiness"], errors='coerce')
df["Wind Speed"] = pd.to_numeric(df["Wind Speed"], errors='coerce')

df.to_csv('../Cities_Weather_Info.csv')

df = df.dropna()
df.head(20)


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#Check to see if there are any values of humidity over 100%
humid = df.loc[df['Humidity'] > 100]
humid

#Repeated trials did not pull any data for this step

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
plt.scatter(df['Latitude'], df['Max Temp'])
plt.title("Latitude vs Temperature ($^\circ$ C)")
plt.ylabel("Max Temp")
plt.xlabel("Latitude")

plt.savefig("Images/Latitude_vs_Temperature.png")

While there are some outliers, this graph shows that as latitudes approach the equator (that is, 0 degrees latitude) temperature increases. 

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(df['Latitude'], df['Humidity'])
plt.title("Latitude vs Humidity (%)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")

plt.savefig("Images/Latitude_vs_Humidity.png")

In [None]:
#Correlation coefficient calculation
correlation = st.pearsonr(df['Latitude'], df['Humidity'])
print(f"The correlation between tumor volume and mouse weight is {round(correlation[0],2)}")

The above graph of latitude vs humidity appears to show that there is no, or very little correlation between humidity and latitude. This can be further proven with the correlation coefficient calculated above.

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(df['Latitude'], df['Cloudiness'])
plt.title("Latitude vs Cloudiness (%)")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")

plt.savefig("Images/Latitude_vs_Cloudiness.png")

The graph above shows the relationship between latitudes and cloud cover - of which there appears to be no correlation with cloud cover varying greatly across similar latitudes.

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(df['Latitude'], df['Wind Speed'])
plt.title("Latitude vs Wind Speed")
plt.ylabel("Windspeed (meters/second)")
plt.xlabel("Latitude")

plt.savefig("Images/Latitude_vs_WindSpeed.png")

The graph of latitude vs windspeed shows several outliers, however again very limited correaltion between latitude and wind speed. 

## Linear Regression

In [None]:
#Create separate dataframes for northern and southern hemispheres
northern_hemisphere = df.loc[df['Latitude'] >= 0]
southern_hemisphere = df.loc[df['Latitude'] < 0]


####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Calculate the linear regression model
x_values = northern_hemisphere['Latitude']
y_values = northern_hemisphere['Max Temp']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

#Plot the chart
plt.scatter(northern_hemisphere['Latitude'], northern_hemisphere['Max Temp'])
plt.title("Northern Latitudes vs Temperature ($^\circ$ C)")
plt.ylabel("Max Temp")
plt.xlabel("Latitude")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(40,18),fontsize=15,color="red")

#Save the graph to 'Images' folder
plt.savefig("Images/NorthernLatitudes_vs_Temperature.png")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere['Latitude']
y_values = southern_hemisphere['Max Temp']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(southern_hemisphere['Latitude'], southern_hemisphere['Max Temp'])
plt.title("Southern Latitudes vs Temperature ($^\circ$ C)")
plt.ylabel("Max Temp")
plt.xlabel("Latitude")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-60,11),fontsize=15,color="red")

plt.savefig("Images/SouthernLatitudes_vs_Temperature.png")

The above graphs show that for this set of 502 random cities, there is a stronger relationship between latitude and temperature above the equator than below. 

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere['Latitude']
y_values = northern_hemisphere['Humidity']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(northern_hemisphere['Latitude'], northern_hemisphere['Humidity'])
plt.title("Northern Latitudes vs Humidity (%)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,10),fontsize=15,color="red")

plt.savefig("Images/NorthernLatitudes_vs_Humidity.png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere['Latitude']
y_values = southern_hemisphere['Humidity']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(southern_hemisphere['Latitude'], southern_hemisphere['Humidity'])
plt.title("Southern Latitudes vs Humidity (%)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-80,45),fontsize=15,color="red")

plt.savefig("Images/SouthernLatitudes_vs_Humidity.png")

The above plots showing latitude vs humidity in the Northern and Southern Hemispheres show that humidity is generally increases with latitude. 

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere['Latitude']
y_values = northern_hemisphere['Cloudiness']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
print(line_eq)

plt.scatter(northern_hemisphere['Latitude'], northern_hemisphere['Cloudiness'])
plt.title("Northern Latitudes vs Cloudiness (%)")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,10),fontsize=15,color="red")

plt.savefig("Images/NorthernLatitudes_vs_Cloudiness.png")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere['Latitude']
y_values = southern_hemisphere['Cloudiness']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(southern_hemisphere['Latitude'], southern_hemisphere['Cloudiness'])
plt.title("Southern Latitudes vs Cloudiness (%)")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-80,45),fontsize=15,color="red")

plt.savefig("Images/SouthernLatitudes_vs_Cloudiness.png")

The above plots showing latitude vs cloudiness in the Northern and Southern Hemispheres show that cloudiness is not related proximity to the equator. 

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_hemisphere['Latitude']
y_values = northern_hemisphere['Wind Speed']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(northern_hemisphere['Latitude'], northern_hemisphere['Wind Speed'])
plt.title("Northern Latitudes vs Wind Speed (meters/second)")
plt.ylabel("Wind Speed (meters/second)")
plt.xlabel("Latitude")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,10),fontsize=15,color="red")

plt.savefig("Images/NorthernLatitudes_vs_WindSpeed.png")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southern_hemisphere['Latitude']
y_values = southern_hemisphere['Wind Speed']

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.scatter(southern_hemisphere['Latitude'], southern_hemisphere['Wind Speed'])
plt.title("Southern Latitudes vs Wind Speed (meters/second)")
plt.ylabel("Wind Speed (meters/second)")
plt.xlabel("Latitude")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-90,9),fontsize=15,color="red")

plt.savefig("Images/SouthernLatitudes_vs_WindSpeed.png")