# WeatherPy
----

In [None]:
# Dependencies and Setup
import json
import requests
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import datetime
from scipy.stats import linregress

# Import API key
from api_key import weather_api_key

# Incorporate citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/city_data.csv"

# Range of latitude and longitude
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:
city_names = []
lat = []
long = []
maxtemp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []
city_counter = 1
set_counter = 1

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).


In [None]:
#         date.append(response["dt"])

print("Beginning Data Retrieval ")
print("-----------------------------")

base_url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
query_url = f"{base_url}appid={weather_api_key}&units={units}&q="

# For each city name in cities list, do below things...
for index, city in enumerate(cities, start = 1):
    try:
        response = requests.get(query_url + city).json()
        city_names.append(response["name"])
        cloudiness.append(response["clouds"]["all"])
        country.append(response["sys"]["country"])
        date = datetime.datetime.fromtimestamp(int(response['dt'])).strftime('%m-%d-%Y')
        humidity.append(response["main"]["humidity"])
        lat.append(response["coord"]["lat"])
        long.append(response["coord"]["lon"])
        maxtemp.append(response["main"]["temp_max"])
        wind_speed.append(response["wind"]["speed"])
        
        if city_counter > 49:
            city_counter = 0
            set_counter = set_counter + 1
    
        else:
            city_counter = city_counter + 1
            
        print(f"Processing Record {city_counter} of Set {set_counter} : {city}") 
  
    except(KeyError, IndexError):
        print("City not found. Skipping...")

print("-----------------------------")
print("Data Retrieval Complete")
print("-----------------------------")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Put the data into a dataframe
city_data_df = pd.DataFrame({
    "City": city_names,
    "Lat": lat,
    "Lng": long,
    "Max Temp": maxtemp,
    "Humidity": humidity,
    "Cloudiness": cloudiness,
    "Wind Speed": wind_speed,
    "Country": country,
    "Date": date
})

# Display the DataFrame
city_data_df.head()

In [None]:
# How many entries were returned?
city_data_df.count()

In [None]:
# Inspect the data by using the describe function
city_data_df.describe()

In [None]:
# Export the city data into a csv file
city_data_df.to_csv("../output_data/city_data.csv", index=False)

In [None]:
# Show Record Count
countforme = len(city_data_df)
countforme

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

In [None]:
# Calculate latest date (max) for plot titles
tstamp = city_data_df.Date.max()
tstamp

#### Latitude vs. Temperature Plot

In [None]:
plt.scatter(city_data_df["Lat"], city_data_df["Max Temp"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Temperature (°F)")
plt.title("Temperature (°F) vs Latitude")

plt.savefig("../images/tempvlat.png")

# As latitude approaches 0, i.e. the equator, temperature increases

#### Latitude vs. Humidity Plot

In [None]:
plt.scatter(city_data_df["Lat"], city_data_df["Humidity"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Humidity (%) vs Latitude")

plt.savefig("../images/humvlat.png")

# There seems to be no relationship between humidity and latitude

#### Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(city_data_df["Lat"], city_data_df["Cloudiness"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Cloudiness (%) vs Latitude")

plt.savefig("../images/cloudvlat.png")

# There seems to be no relationship between cloudiness and latitude

#### Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(city_data_df["Lat"], city_data_df["Wind Speed"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Wind Speed (mph) vs Latitude")

plt.savefig("../images/windvlat.png")

# There seems to be no relationship between wind speed and latitude

## Linear Regression

In [None]:
# Locate cities above or equal to 0 latitude and below 0 latitude
northern_cities = city_data_df[city_data_df.loc[:, "Lat"] >= 0]
southern_cities = city_data_df[city_data_df.loc[:, "Lat"] < 0]

In [None]:
# Create Northern and Southern Hemisphere DataFrames
northlat_df = city_data_df.loc[city_data_df["Lat"] >= 0,:]
southlat_df = city_data_df.loc[city_data_df["Lat"] < 0,:]

In [None]:
#Your next objective is to run linear regression on each relationship, only this time separating 
#them into Northern Hemisphere (greater than or equal to 0 degrees latitude) and 
#Southern Hemisphere (less than 0 degrees latitude):

#Northern Hemisphere - Humidity (%) vs. Latitude
#Southern Hemisphere - Humidity (%) vs. Latitude
#Northern Hemisphere - Cloudiness (%) vs. Latitude
#Southern Hemisphere - Cloudiness (%) vs. Latitude
#Northern Hemisphere - Wind Speed (mph) vs. Latitude
#Southern Hemisphere - Wind Speed (mph) vs. Latitude

#After each pair of plots explain what the linear regression is analyzing, any relationships you notice and any other analysis you may have.

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot Temperature (°F) vs Latitude with linear regression for northern hemisphere
plt.scatter(northern_cities["Lat"], northern_cities["Max Temp"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Temperature (°F)")
plt.title("Temperature (°F) vs Latitude (NH)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(northern_cities["Lat"], northern_cities["Max Temp"])
regress_values = northern_cities["Lat"] * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_equation,(10,10),fontsize=12,color="black")
plt.plot(northern_cities["Lat"],regress_values,"r-")

plt.savefig("../images/tempvlatnh.png")

In [None]:
print("The high r value indicates a strong positive correlation between latitude and max temperature.")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot Temperature (F) vs Latitude with linear regression for southern hemisphere
plt.scatter(southern_cities["Lat"], southern_cities["Max Temp"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Temperature (°F)")
plt.title("Temperature (°F) vs Latitude (SH)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(southern_cities["Lat"], southern_cities["Max Temp"])
regress_values = southern_cities["Lat"] * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_equation,(-53,81),fontsize=12,color="black")
plt.plot(southern_cities["Lat"],regress_values,"r-")

plt.savefig("../images/tempvlatsh.png")

In [None]:
print("The high r value indicates a strong positive correlation between latitude and max temperature.")

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# x_values = northlat_df["Lat"]
# y_values = northlat_df["Humidity"]
# plot_linear_regression(x_values,y_values,'Humidity',(6,30))

# plt.savefig('northernhumvlat')

plt.scatter(northern_cities["Lat"], northern_cities["Humidity"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.title("Humidity vs Latitude (NH)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(northern_cities["Lat"], northern_cities["Humidity"])
regress_values = northern_cities["Lat"] * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_equation,(10,10),fontsize=12,color="black")
plt.plot(northern_cities["Lat"],regress_values,"r-")

plt.savefig("../images/northernhumvlat.png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southlat_df["Lat"]
y_values = southlat_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity',(6,30))

plt.savefig('southernhumvlat')

print("The low r values indicate a weak to no relationship between humidity and latitude.")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northlat_df["Lat"]
y_values = northlat_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness',(6,30))

plt.savefig('northerncloudvlat')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southlat_df["Lat"]
y_values = southlat_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness',(6,30))

plt.savefig('southerncloudvlat')

print("The low r values indicate a weak positive relationship between latitude and cloudiness.")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northlat_df["Lat"]
y_values = northlat_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed',(6,30))

plt.savefig('northernwindvlat')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southlat_df["Lat"]
y_values = southlat_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed',(6,30))

plt.savefig('southernwindvlat')

print("The low r value indicates that there is no significant relationship between wind speed and latitude. The difference between the hemispheres doesn't seem to be significant enough to comment upon.")
      °