# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import scipy.stats as st
from pprint import pprint
from datetime import datetime

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

584

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# Save config information.
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [4]:
# set up lists to hold reponse info
city_name=[]
country_name=[]
lat = []
lng = []
temp_max = []
humidity = []
clouds = []
wind_speed = []
date = []


# Loop through the list of cities and perform a request for data on each
count=0
print("Start API Call to OpenWeather Database ")
print("---------------------------------------")

for city in cities:
    try:
        count += 1
        response = requests.get(query_url + city).json()
        city_name.append(response['name'])
        country_name.append(response['sys']['country'])
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        temp_max.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        clouds.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        date.append(response['dt'])
        print(f"Processing record {count} for the city: {city}")
    
    except:
        #cities.remove(city)
        print(f"{city} not found in OpenWeather database.")
        pass

    print("---------------------------------------")


Start API Call to OpenWeather Database 
---------------------------------------
Processing record 1 for the city: avarua
---------------------------------------
Processing record 2 for the city: barrow
---------------------------------------
Processing record 3 for the city: kapaa
---------------------------------------
attawapiskat not found in OpenWeather database.
---------------------------------------
tuggurt not found in OpenWeather database.
---------------------------------------
Processing record 6 for the city: rikitea
---------------------------------------
Processing record 7 for the city: tastur
---------------------------------------
Processing record 8 for the city: pevek
---------------------------------------
Processing record 9 for the city: nanortalik
---------------------------------------
Processing record 10 for the city: paamiut
---------------------------------------
Processing record 11 for the city: qaanaaq
---------------------------------------
Processing re

Processing record 98 for the city: nikolskoye
---------------------------------------
Processing record 99 for the city: hithadhoo
---------------------------------------
Processing record 100 for the city: prokuplje
---------------------------------------
Processing record 101 for the city: caconda
---------------------------------------
Processing record 102 for the city: cockburn town
---------------------------------------
tsihombe not found in OpenWeather database.
---------------------------------------
Processing record 104 for the city: manjacaze
---------------------------------------
Processing record 105 for the city: kuching
---------------------------------------
Processing record 106 for the city: georgetown
---------------------------------------
Processing record 107 for the city: santa cruz de tenerife
---------------------------------------
Processing record 108 for the city: atuona
---------------------------------------
Processing record 109 for the city: bluff
----

Processing record 194 for the city: valparaiso
---------------------------------------
Processing record 195 for the city: leningradskiy
---------------------------------------
Processing record 196 for the city: santa cruz de la palma
---------------------------------------
Processing record 197 for the city: ilo
---------------------------------------
Processing record 198 for the city: anadyr
---------------------------------------
Processing record 199 for the city: abu dhabi
---------------------------------------
saleaula not found in OpenWeather database.
---------------------------------------
Processing record 201 for the city: walvis bay
---------------------------------------
Processing record 202 for the city: thiruvarur
---------------------------------------
Processing record 203 for the city: hobyo
---------------------------------------
Processing record 204 for the city: atbasar
---------------------------------------
Processing record 205 for the city: decatur
-------

Processing record 290 for the city: itaituba
---------------------------------------
Processing record 291 for the city: porto novo
---------------------------------------
Processing record 292 for the city: esperance
---------------------------------------
karkaralinsk not found in OpenWeather database.
---------------------------------------
Processing record 294 for the city: pringsewu
---------------------------------------
Processing record 295 for the city: jacqueville
---------------------------------------
Processing record 296 for the city: chuy
---------------------------------------
Processing record 297 for the city: sussex
---------------------------------------
mrirt not found in OpenWeather database.
---------------------------------------
Processing record 299 for the city: kushima
---------------------------------------
Processing record 300 for the city: tari
---------------------------------------
Processing record 301 for the city: lufilufi
-------------------------

Processing record 388 for the city: chuka
---------------------------------------
Processing record 389 for the city: wewak
---------------------------------------
Processing record 390 for the city: muriwai beach
---------------------------------------
Processing record 391 for the city: huarmey
---------------------------------------
Processing record 392 for the city: san marcos
---------------------------------------
Processing record 393 for the city: snezhnogorsk
---------------------------------------
Processing record 394 for the city: gizo
---------------------------------------
Processing record 395 for the city: erzin
---------------------------------------
Processing record 396 for the city: iqaluit
---------------------------------------
Processing record 397 for the city: lovington
---------------------------------------
Processing record 398 for the city: richards bay
---------------------------------------
Processing record 399 for the city: klyuchevskiy
---------------

Processing record 486 for the city: majene
---------------------------------------
Processing record 487 for the city: labuhan
---------------------------------------
Processing record 488 for the city: solnechnyy
---------------------------------------
Processing record 489 for the city: huaraz
---------------------------------------
Processing record 490 for the city: fairbanks
---------------------------------------
Processing record 491 for the city: batticaloa
---------------------------------------
Processing record 492 for the city: isawa
---------------------------------------
Processing record 493 for the city: khandyga
---------------------------------------
Processing record 494 for the city: seddon
---------------------------------------
Processing record 495 for the city: ovalle
---------------------------------------
Processing record 496 for the city: kisanga
---------------------------------------
Processing record 497 for the city: quelimane
---------------------------

Processing record 583 for the city: kasongo-lunda
---------------------------------------
bargal not found in OpenWeather database.
---------------------------------------


In [5]:
len(city_name)


541

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [6]:
# create a data frame from cities, lat, temp, humidity, cloudiness, and wind speed.
weather_dict = {
    "City": city_name,
    "Country": country_name,
    "Latitude": lat,
    "Longitude": lng,
    "Temperature (F)": temp_max,
    "Humidity (%)": humidity,
    "Cloudiness (%)": clouds,
    "Wind Speed (mph)": wind_speed,
    "Date": date
    
    
}
weather_data = pd.DataFrame(weather_dict)

#Form https://www.programiz.com/python-programming/datetime/timestamp-datetime
now = datetime.now()

#Export weather data to csv.
weather_data.to_csv("weather_data.csv", encoding="utf-8", index=False)

#Display df.
weather_data.head()

Unnamed: 0,City,Country,Latitude,Longitude,Temperature (F),Humidity (%),Cloudiness (%),Wind Speed (mph),Date
0,Avarua,CK,-21.2078,-159.775,84.2,74,40,6.91,1611082727
1,Barrow,US,71.2906,-156.7887,1.4,85,90,24.16,1611082829
2,Kapaa,US,22.0752,-159.319,69.8,88,90,26.46,1611082831
3,Rikitea,PF,-23.1203,-134.9692,77.97,76,63,18.43,1611082832
4,Testour,TN,36.5513,9.4431,48.2,76,0,3.36,1611082832


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [7]:
hi_hum = weather_data.loc[weather_data['Humidity (%)'] > 100]
print(hi_hum)

Empty DataFrame
Columns: [City, Country, Latitude, Longitude, Temperature (F), Humidity (%), Cloudiness (%), Wind Speed (mph), Date]
Index: []


In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
x_values = weather_data['Latitude']
y_values = weather_data['Temperature (F)']

plt.figure(1, figsize=(10, 6))
plt.scatter(x_values, y_values)
plt.xlim(-90,90,10)
plt.title('Maximum Temperature by Latitude (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.savefig('world_temp_lat.png')
plt.show()



This plot demonstrates that maximum daily temperature does go down as one moves further away from the equator (both North and South). Max. Temp. at latitudes further away from the equator are higher in the southern hemisphere than in the northern hemisphere currently. This latter observation is perhaps explained by the southern hemisphere being in summer, while the northern hemisphere is in winter currently. Due to the tilt of the Earth's axis, the southern hemisphere is facing to the sun and the northern hemisphere is facing away from the sun.

## Latitude vs. Humidity Plot

In [None]:
x_values = weather_data['Latitude']
y_values = weather_data['Humidity (%)']


plt.figure(1, figsize=(10, 6))
plt.scatter(x_values, y_values)
plt.xlim(-90,90,10)
plt.title('Humidity by Latitude (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.savefig('world_hum_lat.png')
plt.show()



This plot demonstrates that there is no particular relationship between percent humidity and latitude. There is some clumping of points in the 80% to 100% humidity at latitudes above 40 degrees north, but no clear correlation to my eye.

## Latitude vs. Cloudiness Plot

In [None]:
x_values = weather_data['Latitude']
y_values = weather_data['Cloudiness (%)']


plt.figure(1, figsize=(10, 6))
plt.scatter(x_values, y_values)
plt.xlim(-90,90,10)
plt.title('Cloudiness by Latitude (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.savefig('world_cloud_lat.png')
plt.show()

This plot demonstrates that there is no clear relationship between cloudiness and latitude. There is some clumping of data at 0%, 20%, 40%, ~75%, and 100%. This makes me think of human error in assigning the values.

## Latitude vs. Wind Speed Plot

In [None]:
x_values = weather_data['Latitude']
y_values = weather_data['Wind Speed (mph)']

plt.figure(1, figsize=(10, 6))
plt.scatter(x_values, y_values)
plt.xlim(-90,90,10)
plt.title('Wind Speed by Latitude (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.savefig('world_wspeed_lat.png')
plt.show()


I see no clear relationship between wind speed and latitude on this plot.

## Linear Regression

In [None]:
nh_data = weather_data.loc[weather_data['Latitude'] >=0]
sh_data = weather_data.loc[weather_data['Latitude'] <0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = nh_data['Latitude']
y_values = nh_data['Temperature (F)']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.figure(1,figsize=(10,6))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.title('Max. Temp. by Latitude-Northern Hemisphere (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.savefig('nh_temp_lat.png')
plt.show()

print(f"The Pearson correlation coefficient between Max. Temp (F) and Latitude in the Northern Hemisphere is {round(st.pearsonr(x_values, y_values)[0],2)}")



####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = sh_data['Latitude']
y_values = sh_data['Temperature (F)']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.figure(1,figsize=(10,6))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.title('Max. Temp. by Latitude-Southern Hemisphere (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.savefig('sh_temp_lat.png')
plt.show()

print(f"The Pearson correlation coefficient between Max. Temp (F) and Latitude in the Southern Hemisphere is {round(st.pearsonr(x_values, y_values)[0],2)}")

Analysis of Max. Temp. by latitude in both the southern and northern hemispheres show a strong correlation with the Max. Temp. getting higher as we get closer to the equator. In the southern hemisphere this manifests as a positve correlation (equator is on the right of the x-axis), in the northern hemisphere a negative correlation describes the same trend (equator is on the left of the x-axis).

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = nh_data['Latitude']
y_values = nh_data['Humidity (%)']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.figure(1,figsize=(10,6))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.title('Humidity by Latitude-Northern Hemisphere (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.savefig('nh_hum_lat.png')
plt.show()

print(f"The Pearson correlation coefficient between Humidity (%) and Latitude in the Northern Hemisphere is {round(st.pearsonr(x_values, y_values)[0],2)}")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = sh_data['Latitude']
y_values = sh_data['Humidity (%)']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.figure(1,figsize=(10,6))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.title('Humidity by Latitude-Southern Hemisphere (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.savefig('sh_hum_lat.png')
plt.show()

print(f"The Pearson correlation coefficient between Humidity (%) and Latitude in the Southern Hemisphere is {round(st.pearsonr(x_values, y_values)[0],2)}")

In this analysis, humidity is increasing as we move further north from the equator and decreasing as we move further south from the equator. The Pearson correlation coefficient deonstrates a stronger correlation for the northern hemisphere. 

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = nh_data['Latitude']
y_values = nh_data['Cloudiness (%)']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.figure(1,figsize=(10,6))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.title('Cloudiness by Latitude-Northern Hemisphere (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.savefig('nh_cloud_lat.png')
plt.show()

print(f"The Pearson correlation coefficient between Cloudiness (%) and Latitude in the Northern Hemisphere is {round(st.pearsonr(x_values, y_values)[0],2)}")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = sh_data['Latitude']
y_values = sh_data['Cloudiness (%)']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.figure(1,figsize=(10,6))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.title('Cloudiness by Latitude-Southern Hemisphere (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.savefig('sh_cloud_lat.png')
plt.show()

print(f"The Pearson correlation coefficient between Cloudiness (%) and Latitude in the Southern Hemisphere is {round(st.pearsonr(x_values, y_values)[0],2)}")

Analysis of cloudiness versus latitude shows a slight trend for cloudiness to increase in the southern hemisphere as we move towards the equator. In the northern hemisphere, cloudiness is increasing as we move away from the equator. In both the northern and the southern hemispheres there are clumps around 0% and near 100%, but no clear correlation to latitude is evident to my eye.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = nh_data['Latitude']
y_values = nh_data['Wind Speed (mph)']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.figure(1,figsize=(10,6))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.title('Wind Speed by Latitude-Northern Hemisphere (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.savefig('nh_ws_lat.png')
plt.show()

print(f"The Pearson correlation coefficient between Wind Speed and Latitude in the Northern Hemisphere is {round(st.pearsonr(x_values, y_values)[0],2)}")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = sh_data['Latitude']
y_values = sh_data['Wind Speed (mph)']
(slope, intercept, rvalue, pvalue, stderr) = st.linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.figure(1,figsize=(10,6))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.title('Wind Speed by Latitude-Southern Hemisphere (data acquired ' + str(now) + ')')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.savefig('sh_ws_lat.png')
plt.show()

print(f"The Pearson correlation coefficient between Wind Speed and Latitude in the Southern Hemisphere is {round(st.pearsonr(x_values, y_values)[0],2)}")

These two plots show no correlation between wind speed and latitude in the northern hemisphere and a slight decrease in wind speed as we move towards the equator in the southern hemisphere. The correlation in the southern hemisphere appears weak.