# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
#output_data_file = "output_data/cities.csv"


# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "Imperial"

#created city short list to bypass API request limitation for development and testing of code
#Will be disabled in final version
#cities = ["Paris", "London", "Oslo", "Beijing", "Dominica", "New York", "Chicago", "Los Angeles", \
#         "Miami", "San Francisco", "Philadelphia", "NotThere"]

# Build partial query URL
weather_url = f"{url}appid={weather_api_key}&units={units}&q="

# set up lists to hold reponse info
city_name = []
cityid = []
country = []
lat = []
long = []
temp = []
max_temp = []
humidity = []
wind = []
cloud = []
counter = 1

#response = requests.get(weather_url + city).json()
#response
#print(json.dumps(response, indent=4, sort_keys=True))

#Loop through the list of cities and perform a request for data on each
for city in cities:
   
    
    # Add a one second interval between queries to stay within API query limits
    try:
        response = requests.get(weather_url + city).json()
        city_name.append(response['name'])
        country.append(response['sys']['country'])
        cityid.append(response['sys']['id'])
        lat.append(response['coord']['lat'])
        long.append(response['coord']['lon'])
        temp.append(response['main']['temp'])
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        wind.append(response['wind']['speed'])
        cloud.append(response['clouds']['all'])               
        city_for_log = response['name'] 
        print(f"city found {counter}| {city_for_log} ")   
        counter = counter + 1
    except:
        print(f"{city} data missing on weathermap will not be included")
    time.sleep(1.15) 

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
weather_dict = {"City":city_name,
    "City ID": cityid,
    "Country":country,
    "Lat":lat,
    "Long":long,
    "Temp":temp,
    "Max Temp": max_temp,
    "Humidity":humidity,
    "Wind Speed":wind,
    "Cloud Cover":cloud,}


weather_df = pd.DataFrame.from_dict(weather_dict, orient='index')
weather_df = weather_df.transpose()
weather_df.dropna(inplace=True)
weather_df.head(300)

In [None]:
weather_df.dropna(inplace=True)
weather_df.head(300)

In [None]:
# Output File (CSV)
output_data_file = "output_data/cities.csv"

#city data into a .csv.
weather_df.to_csv(output_data_file,index=False)

In [None]:
import datetime
#Set current Date:
today = datetime.datetime.today().strftime('%m/%d/%y')


### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

#### Latitude vs. Temperature Plot

In [None]:
#plt.imshow(img,origin='higher')
plt.scatter(weather_df["Temp"],weather_df["Lat"],edgecolors="black",facecolors="coral",
            alpha = 0.75,linewidth = 0.5)
plt.show

plt.title(f"City: Latitude vs.Temperature (F) ({today})", fontsize=14)
plt.xlabel('Temperature (F)', fontsize=12)
plt.ylabel('Latitude', fontsize=12)

#defining size for the graph
#fig_size = plt.rcParams["figure.figsize"]
#fig_size[0] = 10
#fig_size[1] = 8.5
#plt.rcParams["figure.figsize"] = fig_size


#save as a image
plt.savefig("Images/City Latitude vs.Temperature (F).png")
plt.show()

#### Latitude vs. Humidity Plot

In [None]:
plt.scatter(weather_df["Humidity"],weather_df["Lat"],edgecolors="black",facecolors="coral",
            alpha = 0.75,linewidth = 0.5)
plt.show

plt.title(f"City: Latitude vs. Humidity ({today})", fontsize=14)
plt.xlabel('Humidity', fontsize=12)
plt.ylabel('Latitude', fontsize=12)

#save as a image
plt.savefig("Images/Humidity.png")
plt.show()

#### Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(weather_df["Cloud Cover"],weather_df["Lat"],edgecolors="black",facecolors="coral",
            alpha = 0.75,linewidth = 0.5)

plt.title(f"City: Latitude vs. Cloudiness ({today})", fontsize=14)
plt.xlabel('Cloudiness', fontsize=14)
plt.ylabel('Latitude', fontsize=14)

#save as a image
plt.savefig("Images/Cloudiness.png")
plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_df["Wind Speed"],weather_df["Lat"],edgecolors="black",facecolors="coral",
            alpha = 0.75,linewidth = 0.5)

plt.title(f"City: Latitude vs. Windspeed (mph) ({today})", fontsize=14)
plt.xlabel('Latitude', fontsize=14)
plt.ylabel('Windspeed (mph)', fontsize=14)

#save as a image
plt.savefig("Images/Windspeed (mph).png")
plt.show()


## Linear Regression

In [None]:
import scipy.stats as st

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots
def my_regline(x_values, y_values):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    bestfit_values = x_values * slope + intercept
    linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

In [None]:
# Create Northern and Southern Hemisphere DataFrames
northern_df = weather_df.loc[weather_df["Lat"]>=0]
northern_df['Lat'] = pd.to_numeric(northern_df['Lat'])
northern_df['Long'] = pd.to_numeric(northern_df['Long'])
northern_df['Temp'] = pd.to_numeric(northern_df['Temp'])
northern_df['Max Temp'] = pd.to_numeric(northern_df['Max Temp'])
northern_df['Humidity'] = pd.to_numeric(northern_df['Humidity'])
northern_df['Wind Speed'] = pd.to_numeric(northern_df['Wind Speed'])
northern_df['Cloud Cover'] = pd.to_numeric(northern_df['Cloud Cover'])


southern_df = weather_df.loc[weather_df["Lat"]<0]
southern_df['Lat'] = pd.to_numeric(southern_df['Lat'])
southern_df['Long'] = pd.to_numeric(southern_df['Long'])
southern_df['Temp'] = pd.to_numeric(southern_df['Temp'])
southern_df['Max Temp'] = pd.to_numeric(southern_df['Max Temp'])
southern_df['Humidity'] = pd.to_numeric(southern_df['Humidity'])
southern_df['Wind Speed'] = pd.to_numeric(southern_df['Wind Speed'])
southern_df['Cloud Cover'] = pd.to_numeric(southern_df['Cloud Cover'])

northern_df.head()

northern_df.dtypes

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = northern_df['Max Temp']
y_values = northern_df['Lat']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
bestfit_values = x_values * slope + intercept
linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.title(f"Northern Hemisphere: Latitude vs.Max Temperature (F) ({today})", fontsize=12)
plt.scatter(x_values,y_values)
plt.plot(x_values,bestfit_values,"r-")
plt.annotate(linear_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Max Temp in Fahrenheit')
plt.ylabel('Latitude)')
# Print out the r-squared value along with the plot.
print(f"The r-squared is: {rvalue}")
plt.savefig('Northern Hemisphere - Max Temp vs. Latitude Linear Regression.jpeg')
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = southern_df['Max Temp']
y_values = southern_df['Lat']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
bestfit_values = x_values * slope + intercept
linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.title(f"Southern Hemisphere: Latitude vs.Max Temperature (F) ({today})", fontsize=12)
plt.scatter(x_values,y_values)
plt.plot(x_values,bestfit_values,"r-")
plt.annotate(linear_eq,(6,10),fontsize=12,color="red")
plt.xlabel('Max Temp in Fahrenheit')
plt.ylabel('Latitude)')
# Print out the r-squared value along with the plot.
print(f"The r-squared is: {rvalue}")
plt.savefig('Southern Hemisphere - Max Temp vs. Latitude Linear Regression.jpeg')
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_df['Humidity']
y_values = northern_df['Lat']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
bestfit_values = x_values * slope + intercept
linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,bestfit_values,"r-")
plt.annotate(linear_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Humidity %')
plt.ylabel('Latitude)')
# Print out the r-squared value along with the plot.
print(f"The r-squared is: {rvalue}")
plt.savefig('Northern Hemisphere - Humidity vs. Latitude Linear Regression.jpeg')
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_df['Humidity']
y_values = southern_df['Lat']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
bestfit_values = x_values * slope + intercept
linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,bestfit_values,"r-")
plt.annotate(linear_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Humidity %')
plt.ylabel('Latitude)')
# Print out the r-squared value along with the plot.
print(f"The r-squared is: {rvalue}")
plt.savefig('Southern Hemisphere - Humidity vs. Latitude Linear Regression.jpeg')
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northern_df['Cloud Cover']
y_values = northern_df['Lat']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
bestfit_values = x_values * slope + intercept
linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,bestfit_values,"r-")
plt.annotate(linear_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Cloud Cover')
plt.ylabel('Latitude)')
# Print out the r-squared value along with the plot.
print(f"The r-squared is: {rvalue}")
plt.savefig('Northern Hemisphere - Cloud Cover vs. Latitude Linear Regression.jpeg')
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southern_df['Cloud Cover']
y_values = southern_df['Lat']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
bestfit_values = x_values * slope + intercept
linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,bestfit_values,"r-")
plt.annotate(linear_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Cloud Cover')
plt.ylabel('Latitude)')
# Print out the r-squared value along with the plot.
print(f"The r-squared is: {rvalue}")
plt.savefig('Southern Hemisphere - Cloud Cover vs. Latitude Linear Regression.jpeg')
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northern_df['Wind Speed']
y_values = northern_df['Lat']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
bestfit_values = x_values * slope + intercept
linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,bestfit_values,"r-")
plt.annotate(linear_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Wind Speed in MPH')
plt.ylabel('Latitude)')
# Print out the r-squared value along with the plot.
print(f"The r-squared is: {rvalue}")
plt.savefig('Northern Hemisphere - Wind Speed vs. Latitude Linear Regression.jpeg')
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southern_df['Wind Speed']
y_values = southern_df['Lat']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
bestfit_values = x_values * slope + intercept
linear_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,bestfit_values,"r-")
plt.annotate(linear_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Wind Speed in MPH')
plt.ylabel('Latitude)')
# Print out the r-squared value along with the plot.
print(f"The r-squared is: {rvalue}")
plt.savefig('Southern Hemisphere - Wind Speed vs. Latitude Linear Regression.jpeg')
plt.show()

In [None]:
print(f"It is clear from the plots that there is a strong correlation between temperation and proximity to the equator.")
print(f"This is true for bother hemispheres. However, the relationship between latitude and the other data points\
(cloudiness, Humidity, and wind speed) is more random. Despite a lack of strong corelation, some of the data points\
revealed some interesting findings. Most of the humidity values fell between 40% - 90%. Wind speed was mostly in the\
13 mph or below, irrespective of proximity to the equator. This may indicate the normal range for these observations,\
however, too premature to conclude.")