# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import json
import time
from scipy.stats import linregress

# Import API key
from api_keys import api_keys

# Incorporated citipy to determine city based on latitude and longitude
from citypy import citipy


# Output File (CSV)
output_data_file = ".../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
query_url = f"{url}appid={weather_api_key}&units={units}&q="

#Create empty lists for the data we need to store.
city_two = []
cloudiness = []
country = []
date = []
humidity = []
lat = []
lng = []
max_temp = []
wind_speed = []

#Sets and Counts for API calls
count_one = 0
set_one = 1

#For Loop to gather Weather Information
for city in cities:
    try:
        city_url = query_url + city
        response = requests.get(city_url).json()
        cloudiness.append(response['clouds']['all'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        humidity.append(response['main']['humidity'])
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        wind_speed.append(response['wind']['speed'])
        if count_one > 48:
            count_one = 1
            set_one += 1
            city_two.append(city)
        else: 
            count_one += 1
            city_two.append(city)
        print(f"Retrieving Record {count_one} of Set {set_one} for City: {city}")
    except KeyError:
        print("City Not Found. Skipping...")
        
print("Data Retrieval Completed.")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#Create a Dictionary for the DataFrame
weather_dict = {"City": city_two,
               "Cloudiness": cloudiness,
               "Country": country,
               "Date": date,
               "Humidity": humidity,
               "Lat": lat,
               "Lng": lng,
               "Max Temp": max_temp,
               "Wind Speed": wind_speed}

#Create DataFrame
weather_data = pd.DataFrame(weather_dict)

weather_data.head()

In [None]:
weather_data.count()

In [None]:
#Export the City Data into a .csv
weather_data.to_csv(output_data_file)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
humidity_stats = weather_data['Humidity'].describe()

print("The statistics for the Humidity column show the maximum as 100%. There are no cities with humidity over 100%.")
print(humidity_stats)


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#Create the Scatter Plot for Latitude vs. Max Temperature
weather_data.plot(kind='scatter', x ='Lat', y='Max Temp', marker='o', color='crimson', edgecolor='darkred', alpha=0.5)

#Define Labels
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.title("City Latitude vs. Max Temperature (F)")

plt.tight_layout()

#Save the plot as a .png
plt.savefig("../Images/Latitude_MaxTemp.png")

## Latitude vs. Humidity Plot

In [None]:
#Create the Scatter Plot for Latitude vs. Humidity
weather_data.plot(kind='scatter', x ='Lat', y='Humidity', marker='o', color='skyblue', 
                  edgecolor='steelblue', alpha=0.5)

#Define Labels
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("City Latitude vs. Humidity (%)")

plt.tight_layout()

#Save the plot as a .png
plt.savefig("../Images/Latitude_Humidity.png")

## Latitude vs. Cloudiness Plot

In [None]:
#Create the Scatter Plot for Latitude vs. Cloudiness
weather_data.plot(kind='scatter', x ='Lat', y='Cloudiness', marker='o', color='silver', 
                  edgecolor='dimgrey', alpha=0.5)

#Define Labels
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("City Latitude vs. Cloudiness (%)")

plt.tight_layout()

#Save the plot as a .png
plt.savefig("../Images/Latitude_Cloudiness.png")

## Latitude vs. Wind Speed Plot

In [None]:
#Create the Scatter Plot for Latitude vs. Wind Speed
weather_data.plot(kind='scatter', x ='Lat', y='Wind Speed', marker='o', color='turquoise', 
                  edgecolor='teal', alpha=0.5)

#Define Labels
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("City Latitude vs. Wind Speed (mph)")

plt.tight_layout()

#Save the plot as a .png
plt.savefig("../Images/Latitude_WindSpeed.png")

## Linear Regression

In [None]:
#Create DataFrames for Northern Hemisphere & Southern Hemisphere respectively for Analysis.
northern_hemisphere_df = weather_data.loc[weather_data['Lat']>=0]

southern_hemisphere_df = weather_data.loc[weather_data['Lat']<0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:

#Creating a Scatter Plot for the Northern Hemisphere - Max Temp vs. Latitude 
northern_hemisphere_df.plot(kind='scatter', x='Lat', y='Max Temp', marker='o', color='crimson', 
                            edgecolor='darkred', alpha=0.5)
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.title("City Latitude vs. Temperature (F) in the Northern Hemisphere")
plt.tight_layout()

#Perform the Linear Regression
x_values = northern_hemisphere_df['Lat']
y_values = northern_hemisphere_df['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values*slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x +" + str(round(intercept, 2))
r_squared_value = rvalue**2

#Plot the Linear Regression Line, Equation, Pearson's R & R-Squared Value
plt.plot(x_values, regress_values, "r--")
plt.annotate(line_eq, (50,80), fontsize=10, color = 'red')
plt.annotate(f'r: {round(rvalue,2)}', (50, 75), fontsize=10, color='red')
plt.annotate(f'r-squared: {round(r_squared_value,2)}', (50,70), fontsize=10, color='red')

#Export Image to Images Folder
plt.savefig("../Images/NorthernHemisphere_Temp.png")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
#Creating a Scatter Plot for the Southern Hemisphere - Max Temp vs. Latitude 
southern_hemisphere_df.plot(kind='scatter', x='Lat', y='Max Temp', marker='o', color='crimson', 
                            edgecolor='darkred', alpha=0.5)
plt.xlabel("Latitude")
plt.ylabel("Temperature (F)")
plt.title("City Latitude vs. Temperature (F) in the Southern Hemisphere")
plt.tight_layout()

#Perform the Linear Regression
x_values = southern_hemisphere_df['Lat']
y_values = southern_hemisphere_df['Max Temp']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values*slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x +" + str(round(intercept, 2))
r_squared_value = rvalue**2

#Plot the Linear Regression Line, Equation, Pearson's R & R-Squared Value
plt.plot(x_values, regress_values, "r--")
plt.annotate(line_eq, (-50,80), fontsize=10, color = 'red')
plt.annotate(f'r: {round(rvalue,2)}', (-50, 75), fontsize=10, color='red')
plt.annotate(f'r-squared: {round(r_squared_value,2)}', (-50,70), fontsize=10, color='red')

#Export Image to Images Folder
plt.savefig("../Images/SouthernHemisphere_Temp.png")

ANALYSIS:
The scatter plot for the Northen Hemisphere shows a very strong negative correlation between Latitude and Temperature (F), with a Pearson's R of -0.83. This means that as a city moves further North away from the equator (Latitude increases), the Temperature decreases. The R-squared value for this relationship is 0.68, meaning that for our dataset of cities, 68% of the variability in Temperature (F) is determined by Latitude in the Northern Hemisphere.

The scatter plot for the Southern Hemisphere shows a strong posivite correlation between Latitude and Temperature (F), with a Pearson's R of 0.75. This means that as a city moves further further North towards the equator (Latitude increases), the Temperature increases. The R-squared value for this relationship is 0.56, meaning that for our dataset of cities, 56% of the variability in Tempearture (F) is determined by Latitude in the Southern Hemisphere.

It is worth noting that the list of cities generated above did include a higher number of locations in the Northern Hemisphere. The Northern Hemisphere dataframe had 375 cities. The Southern Hemisphere dataframe had 183 cities. It stands to reason that this discrepency in sample size is the reason the Northen Hemisphere data exhibited a stronger relationship than the Southern Hemisphere data. If more cities were added to the Southern Hemisphere data set, it would be likely that the relationship between Latitude and Temperature (F) would grow stronger.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Creating a Scatter Plot for the Northern Hemisphere - Humidity vs. Latitude 
northern_hemisphere_df.plot(kind='scatter', x='Lat', y='Humidity', marker='o', color='skyblue', 
                            edgecolor='steelblue', alpha=0.5)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("City Latitude vs. Humidity (%) in the Northern Hemisphere")
plt.tight_layout()

#Perform the Linear Regression
x_values = northern_hemisphere_df['Lat']
y_values = northern_hemisphere_df['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values*slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x +" + str(round(intercept, 2))
r_squared_value = rvalue**2

#Plot the Linear Regression Line, Equation, Pearson's R & R-Squared Value
plt.plot(x_values, regress_values, "r--")
plt.annotate(line_eq, (10,45), fontsize=10, color = 'red')
plt.annotate(f'r: {round(rvalue,2)}', (10, 40), fontsize=10, color='red')
plt.annotate(f'r-squared: {round(r_squared_value,2)}', (10,35), fontsize=10, color='red')

#Export Image to Images Folder
plt.savefig("../Images/NorthernHemisphere_Humidity.png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
#Creating a Scatter Plot for the Southern Hemisphere - Humidity vs. Latitude 
southern_hemisphere_df.plot(kind='scatter', x='Lat', y='Humidity', marker='o', color='skyblue', 
                            edgecolor='steelblue', alpha=0.5)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("City Latitude vs. Humidity (%) in the Southern Hemisphere")
plt.tight_layout()

#Perform the Linear Regression
x_values = southern_hemisphere_df['Lat']
y_values = southern_hemisphere_df['Humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values*slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x +" + str(round(intercept, 2))
r_squared_value = rvalue**2

#Plot the Linear Regression Line, Equation, Pearson's R & R-Squared Value
plt.plot(x_values, regress_values, "r--")
plt.annotate(line_eq, (-50,65), fontsize=10, color = 'red')
plt.annotate(f'r: {round(rvalue,2)}', (-50, 60), fontsize=10, color='red')
plt.annotate(f'r-squared: {round(r_squared_value,2)}', (-50,55), fontsize=10, color='red')

#Export Image to Images Folder
plt.savefig("../Images/SouthernHemisphere_Humidity.png")

ANALYSIS:
Neither of the above scatter plots, depicicting the relationship between Latitude and Humidity (%) in the Northern and Southern Hemispheres respectively, shows any indication of a statistically significant relationship existing. There is a very weak positive correlation in the Northern Hemisphere, however the R-squared value is negligible. The positive correlation is even weaker in the Southern Hemisphere.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Creating a Scatter Plot for the Northern Hemisphere - Cloudiness vs. Latitude 
northern_hemisphere_df.plot(kind='scatter', x='Lat', y='Cloudiness', marker='o', color='silver', 
                            edgecolor='dimgrey', alpha=0.5)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("City Latitude vs. Cloudiness (%) in the Northern Hemisphere")
plt.tight_layout()

#Perform the Linear Regression
x_values = northern_hemisphere_df['Lat']
y_values = northern_hemisphere_df['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values*slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x +" + str(round(intercept, 2))
r_squared_value = rvalue**2

#Plot the Linear Regression Line, Equation, Pearson's R & R-Squared Value
plt.plot(x_values, regress_values, "r--")
plt.annotate(line_eq, (63,55), fontsize=10, color = 'red')
plt.annotate(f'r: {round(rvalue,2)}', (63, 50), fontsize=10, color='red')
plt.annotate(f'r-squared: {round(r_squared_value,2)}', (63,45), fontsize=10, color='red')

#Export Image to Images Folder
plt.savefig("../Images/NorthernHemisphere_Cloudiness.png")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
#Creating a Scatter Plot for the Southern Hemisphere - Cloudiness vs. Latitude 
southern_hemisphere_df.plot(kind='scatter', x='Lat', y='Cloudiness', marker='o', color='silver', 
                            edgecolor='dimgrey', alpha=0.5)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("City Latitude vs. Cloudiness (%) in the Southern Hemisphere")
plt.tight_layout()

#Perform the Linear Regression
x_values = southern_hemisphere_df['Lat']
y_values = southern_hemisphere_df['Cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values*slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x +" + str(round(intercept, 2))
r_squared_value = rvalue**2

#Plot the Linear Regression Line, Equation, Pearson's R & R-Squared Value
plt.plot(x_values, regress_values, "r--")
plt.annotate(line_eq, (-13,50), fontsize=10, color = 'red')
plt.annotate(f'r: {round(rvalue,2)}', (-13, 45), fontsize=10, color='red')
plt.annotate(f'r-squared: {round(r_squared_value,2)}', (-13,40), fontsize=10, color='red')

#Export Image to Images Folder
plt.savefig("../Images/SouthernHemisphere_Cloudiness.png")

ANALYSIS:
As with the analysis between Latitude and Humidity above, the relationships shown by the above scatter plots for Latitude vs. Cloudiness (%) in the Northern and Southern Hemispheres shows little to no correlation. There is a very weak positive correlation present between Latitude and Cloudiness (%) in the Northen Hemisphere with a Pearson's R of 0.16 and an R-squared value of 0.03. Meaning that while Cloudiness (%) does increase as Latitude increases, it is not a statistically significant relationship.

In the Southern Hemisphere, the positive correlation between Latitude and Cloudiness (%) does become stronger than in the Northern Hemisphere, but remains weak. The Pearson's R for this relationship is 0.24 and the R-squared value is 0.06. It is interesting to note that though the positive correlation itself became stronger, the percentage of variability contributed to this relationship became smaller. This could mean that other external factors are at play, causing this relationship to appear stronger than it is. This is a great example of correlation not equating to causation.

Additionally, this is the first time we see the Southern Hemisphere having a stronger correlation than the Northern Hemisphere in our analysis.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Creating a Scatter Plot for the Northern Hemisphere - Wind Speed vs. Latitude 
northern_hemisphere_df.plot(kind='scatter', x='Lat', y='Wind Speed', marker='o', color='turquoise', 
                            edgecolor='teal', alpha=0.5)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("City Latitude vs. Wind Speed (mph) in the Northern Hemisphere")
plt.tight_layout()

#Perform the Linear Regression
x_values = northern_hemisphere_df['Lat']
y_values = northern_hemisphere_df['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values*slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x +" + str(round(intercept, 2))
r_squared_value = rvalue**2

#Plot the Linear Regression Line, Equation, Pearson's R & R-Squared Value
plt.plot(x_values, regress_values, "r--")
plt.annotate(line_eq, (60,14), fontsize=10, color = 'red')
plt.annotate(f'r: {round(rvalue,2)}', (60, 12), fontsize=10, color='red')
plt.annotate(f'r-squared: {round(r_squared_value,2)}', (60,10), fontsize=10, color='red')

#Export Image to Images Folder
plt.savefig("../Images/NorthernHemisphere_WindSpeed.png")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
#Creating a Scatter Plot for the Southern Hemisphere - Wind Speed vs. Latitude 
southern_hemisphere_df.plot(kind='scatter', x='Lat', y='Wind Speed', marker='o', color='turquoise', 
                            edgecolor='teal', alpha=0.5)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("City Latitude vs. Wind Speed (mph) in the Southern Hemisphere")
plt.tight_layout()

#Perform the Linear Regression
x_values = southern_hemisphere_df['Lat']
y_values = southern_hemisphere_df['Wind Speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values*slope + intercept
line_eq = "y =" + str(round(slope,2)) + "x +" + str(round(intercept, 2))
r_squared_value = rvalue**2

#Plot the Linear Regression Line, Equation, Pearson's R & R-Squared Value
plt.plot(x_values, regress_values, "r--")
plt.annotate(line_eq, (-50,14), fontsize=10, color = 'red')
plt.annotate(f'r: {round(rvalue,2)}', (-50, 12), fontsize=10, color='red')
plt.annotate(f'r-squared: {round(r_squared_value,2)}', (-50,10), fontsize=10, color='red')

#Export Image to Images Folder
plt.savefig("../Images/SouthernHemisphere_WindSpeed.png")

ANALYSIS:
Neither of the above scatter plots, depicicting the relationship between Latitude and Wind Speed (mph) in the Northern and Southern Hemispheres respectively, shows any indication of a statistically significant relationship existing. There is a very weak positive correlation in the Northern Hemisphere and a very weak negative correlation in the Southern Hemisphere, but neither of these relationships is worth mentioning.

Final Analysis
Looking at the above data there are a few important pieces of information that can be taken away:

The strongest relationship observed between City Latitude and the various weather information collected was in relation to Max Temperature (F). Looking at the data set as a whole, the relationship between City Latitude and Temperature could not be assesed with a linear regression, as it is closer to a quadratic relationship. However, breaking the dataset in to the Northern and Southern Hemisphere allowed for the exploration of linear relationships. In the Northern Hemisphere there was a very strong negative correlation between Latitude and Temperature (F). In the Southern Hemisphere there was a strong positive correlation between Latitude and Temperature (F). In summary, this means that as a city becomes closer to the Equator, the temperature increases.

The second strongest relationship observed was between City Latitude and Cloudiness, however it was by no means a strong or statistically significant relationship. There were very weak positive correlations between Latitude and Cloudiness for both the Northern and Southern Hemisphere. The R-squared values for these correlations were also very low, showing that the percent of variability in Cloudiness was not determined by the change in Latitude. There are likely external factors and other variables at play causing these weak positive correlations.

The analysis done for City Latitude vs. Humidity and City Latitude vs. Wind Speed show that there was practically no relationship between the two variables. In the Northern Hemisphere there was a weak correlation between Humidity and Latitude, but the R-squared value was very small. Similarly to the above observations in regards to Cloudiness, this could mean that there are other variables at play causing this correlation to appear stronger than it is.