# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

%matplotlib notebook
# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)
print(weather_api_key)

97a5f317ff8cdca54a2422c16a476313


In [2]:
pip install citipy

Note: you may need to restart the kernel to use updated packages.


## Generate Cities List

In [3]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

607

In [4]:
for city in cities:
    print(city)

pacific grove
hermanus
hobyo
pitimbu
hobart
upernavik
bluff
nizhneyansk
hilo
ituni
sentyabrskiy
bubaque
ushuaia
kapaa
albany
strezhevoy
dolbeau
hasaki
saint george
vao
bac lieu
toliary
port blair
vaini
rikitea
palabuhanratu
mataura
busselton
los llanos de aridane
hofn
faanui
tsihombe
farmington
petropavlovsk-kamchatskiy
chuy
new norfolk
airai
port hedland
barentsburg
raesfeld
hope
the pas
seoul
hami
campbell river
arraial do cabo
illoqqortoormiut
parnamirim
kashiwazaki
ouadda
fukuma
lasa
jamestown
wewak
krupina
tocopilla
barrow
atuona
hare bay
puerto suarez
port elizabeth
yellowknife
punta arenas
klaksvik
praia
tyler
aswan
porangatu
san patricio
tiksi
tura
severodvinsk
olafsvik
princeton
puerto ayora
daru
beira
leningradskiy
dikson
imbituba
banda aceh
acapulco
imeni poliny osipenko
mys shmidta
cabo san lucas
yenagoa
praia da vitoria
lagoa
meybod
bredasdorp
jamame
carnarvon
satitoa
hithadhoo
okhotsk
turayf
paragominas
ketchikan
longyearbyen
kaitangata
saldanha
sitka
georgetown
nanortali

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [8]:
units = "Imperial"
base_url = f"http://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units={units}"

index = 0
set_count = 0

weather_city = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

print('Beginning Data Retrieval')
print('------------------------')

for city in cities:
    try:
        city_url = f"{base_url}&q={city}"
        response = requests.get(city_url).json()
        print(f"Processing Record {index} of Set {set_count} | {city}")
        
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        weather_city.append(city)
        
    except:
        print("City not found. Skipping.. ")  
    
    index = index + 1
    
    if index == 50:
        index = 0
        set_count = set_count + 1
        
print("-----------------------------")
print(f"Data Retrieval Complete")     
print("-----------------------------")

Beginning Data Retrieval
------------------------
Processing Record 0 of Set 0 | pacific grove
Processing Record 1 of Set 0 | hermanus
Processing Record 2 of Set 0 | hobyo
Processing Record 3 of Set 0 | pitimbu
Processing Record 4 of Set 0 | hobart
Processing Record 5 of Set 0 | upernavik
Processing Record 6 of Set 0 | bluff
Processing Record 7 of Set 0 | nizhneyansk
City not found. Skipping.. 
Processing Record 8 of Set 0 | hilo
Processing Record 9 of Set 0 | ituni
City not found. Skipping.. 
Processing Record 10 of Set 0 | sentyabrskiy
City not found. Skipping.. 
Processing Record 11 of Set 0 | bubaque
Processing Record 12 of Set 0 | ushuaia
Processing Record 13 of Set 0 | kapaa
Processing Record 14 of Set 0 | albany
Processing Record 15 of Set 0 | strezhevoy
Processing Record 16 of Set 0 | dolbeau
City not found. Skipping.. 
Processing Record 17 of Set 0 | hasaki
Processing Record 18 of Set 0 | saint george
Processing Record 19 of Set 0 | vao
Processing Record 20 of Set 0 | bac lieu

Processing Record 37 of Set 3 | kijini
Processing Record 38 of Set 3 | tasiilaq
Processing Record 39 of Set 3 | pevek
Processing Record 40 of Set 3 | houma
Processing Record 41 of Set 3 | lima
Processing Record 42 of Set 3 | ilulissat
Processing Record 43 of Set 3 | shetpe
Processing Record 44 of Set 3 | salalah
Processing Record 45 of Set 3 | high level
Processing Record 46 of Set 3 | constitucion
Processing Record 47 of Set 3 | khatanga
Processing Record 48 of Set 3 | tuktoyaktuk
Processing Record 49 of Set 3 | bam
Processing Record 0 of Set 4 | lebu
Processing Record 1 of Set 4 | grand river south east
City not found. Skipping.. 
Processing Record 2 of Set 4 | mahebourg
Processing Record 3 of Set 4 | nouadhibou
Processing Record 4 of Set 4 | avarua
Processing Record 5 of Set 4 | saint anthony
Processing Record 6 of Set 4 | hirara
Processing Record 7 of Set 4 | torbay
Processing Record 8 of Set 4 | port moresby
Processing Record 9 of Set 4 | amderma
City not found. Skipping.. 
Proces

Processing Record 17 of Set 11 | moiyabana
City not found. Skipping.. 
Processing Record 18 of Set 11 | alice springs
Processing Record 19 of Set 11 | coxim
Processing Record 20 of Set 11 | mrirt
City not found. Skipping.. 
Processing Record 21 of Set 11 | tazovskiy
Processing Record 22 of Set 11 | springfield
Processing Record 23 of Set 11 | yerbogachen
Processing Record 24 of Set 11 | santa maria
Processing Record 25 of Set 11 | tabory
Processing Record 26 of Set 11 | uwayl
City not found. Skipping.. 
Processing Record 27 of Set 11 | saleaula
City not found. Skipping.. 
Processing Record 28 of Set 11 | linhares
Processing Record 29 of Set 11 | tilichiki
Processing Record 30 of Set 11 | laguna
Processing Record 31 of Set 11 | umm bab
Processing Record 32 of Set 11 | parainen
Processing Record 33 of Set 11 | estremoz
Processing Record 34 of Set 11 | talnakh
Processing Record 35 of Set 11 | fernley
Processing Record 36 of Set 11 | vestmannaeyjar
Processing Record 37 of Set 11 | margate


In [23]:
units = "Imperial"
base_url = f"http://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units={units}"

city = "paita"
city_url = f"{base_url}&q={city}"
response = requests.get(city_url).json()
response

{'coord': {'lon': -81.11, 'lat': -5.09},
 'weather': [{'id': 800,
   'main': 'Clear',
   'description': 'clear sky',
   'icon': '01n'}],
 'base': 'stations',
 'main': {'temp': 62.28,
  'feels_like': 57.29,
  'temp_min': 62.28,
  'temp_max': 62.28,
  'pressure': 1011,
  'humidity': 86,
  'sea_level': 1011,
  'grnd_level': 1003},
 'visibility': 10000,
 'wind': {'speed': 13.4, 'deg': 214},
 'clouds': {'all': 5},
 'dt': 1596238222,
 'sys': {'country': 'PE', 'sunrise': 1596195233, 'sunset': 1596238053},
 'timezone': -18000,
 'id': 3694112,
 'name': 'Paita',
 'cod': 200}

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [24]:
sum = {"City": weather_city, "Lat": lat, "Lng": lng, 
       "Max Temp": max_temp, "Humidity": humidity, 
       "Cloudiness": cloudiness, "Wind Speed": wind_speed, 
       "Country": country, "Date": date}

weather_df = pd.DataFrame(sum)
weather_df.count()

City          557
Lat           557
Lng           557
Max Temp      557
Humidity      557
Cloudiness    557
Wind Speed    557
Country       557
Date          557
dtype: int64

In [13]:
weather_df 

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,pacific grove,36.62,-121.92,71.01,60,1,9.17,US,1596238050
1,hermanus,-34.42,19.23,44.01,95,1,5.75,ZA,1596238148
2,hobyo,5.35,48.53,77.16,75,1,28.16,SO,1596238149
3,pitimbu,-7.47,-34.81,73.40,94,20,5.82,BR,1596238149
4,hobart,-42.88,147.33,50.00,71,75,10.29,AU,1596237898
...,...,...,...,...,...,...,...,...,...
552,los algarrobos,8.50,-82.43,86.00,74,40,6.93,PA,1596238270
553,great yarmouth,52.61,1.73,73.00,82,58,1.01,GB,1596238270
554,kloulklubed,7.04,134.26,78.80,94,90,5.82,PW,1596238270
555,tabou,4.42,-7.35,75.15,86,92,11.92,CI,1596238271


In [25]:
#Export DataFrame to CSV File
weather_df.to_csv("../output_data/weather_df.csv")

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [26]:
weather_df = weather_df[weather_df["Humidity"] <= 100]
weather_df.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,pacific grove,36.62,-121.92,71.01,60,1,9.17,US,1596238050
1,hermanus,-34.42,19.23,44.01,95,1,5.75,ZA,1596238148
2,hobyo,5.35,48.53,77.16,75,1,28.16,SO,1596238149
3,pitimbu,-7.47,-34.81,73.4,94,20,5.82,BR,1596238149
4,hobart,-42.88,147.33,50.0,71,75,10.29,AU,1596237898


In [27]:
weather_df.describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,557.0,557.0,557.0,557.0,557.0,557.0,557.0
mean,19.978061,14.8986,69.957253,70.129264,48.131059,7.854381,1596238000.0
std,33.110105,91.894027,13.374569,21.371866,37.528585,5.507134,67.90144
min,-54.8,-179.17,33.8,1.0,0.0,0.45,1596238000.0
25%,-7.74,-66.85,59.79,58.0,9.0,3.94,1596238000.0
50%,21.46,18.42,72.0,75.0,43.0,6.6,1596238000.0
75%,48.42,99.83,79.07,87.0,87.0,10.29,1596238000.0
max,78.22,179.32,111.2,100.0,100.0,30.65,1596238000.0


In [28]:
#  Get the indices of cities that have humidity over 100%.
indexes = weather_df[weather_df["Humidity"]>100]["City"].index
indexes

Int64Index([], dtype='int64')

In [29]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_data = weather_df.drop(indexes, inplace=False)
clean_data.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,pacific grove,36.62,-121.92,71.01,60,1,9.17,US,1596238050
1,hermanus,-34.42,19.23,44.01,95,1,5.75,ZA,1596238148
2,hobyo,5.35,48.53,77.16,75,1,28.16,SO,1596238149
3,pitimbu,-7.47,-34.81,73.4,94,20,5.82,BR,1596238149
4,hobart,-42.88,147.33,50.0,71,75,10.29,AU,1596237898


In [30]:
# Extract relevant fields from the data frame


# Export the City_Data into a csv
clean_data.to_csv('../output_data/clean_city.csv')

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [51]:
today = time.strftime("%m/%d/%Y")
temp= clean_data["Max Temp"]
lat= clean_data["Lat"]

plt.figure(1)
fig = plt.figure(figsize=(5,5))

plt.scatter(lat, temp, marker="o", facecolors="blue", edgecolors="black", alpha = 0.5 )
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.title(f"City Latitude vs. Max Temperature (F) ({today})")
plt.grid(True)

fig.savefig('../output_data/Fig1.png', bbox_inches='tight', dpi=150)
plt.show()
print("The plot proves that as we generally move away from the equator, temperature drops")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

The plot proves that as we generally move away from the equator, temperature drops


## Latitude vs. Humidity Plot

In [52]:
today = time.strftime("%m/%d/%Y")
humidity = clean_data["Humidity"]
lat= clean_data["Lat"]

plt.figure(2)

plt.scatter(lat, humidity , facecolors="blue", edgecolors="black", alpha = 0.5)
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title(f"City Latitude vs Humidity (%)({today})")
fig.savefig('../output_data/Fig2.png', bbox_inches='tight', dpi=150)
print("The plot shows that there is no real pattern that can describe the relationship between latitude and humidity")
plt.show()

<IPython.core.display.Javascript object>

The plot shows that there is no real pattern that can describe the relationship between latitude and humidity


## Latitude vs. Cloudiness Plot

In [56]:
today = time.strftime("%m/%d/%Y")
cloudiness = clean_data["Cloudiness"]
lat= clean_data["Lat"]

plt.figure(3)
plt.scatter(lat, cloudiness, facecolors="blue", edgecolors="black", alpha = 0.5)
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title(f"City Latitude vs Cloudiness ({today})")
fig.savefig('../output_data/fig3.png', bbox_inches='tight', dpi=150)

print("The plot shows that there is no real pattern that can describe the relationship between latitude and cloudiness")
plt.show()

The plot shows that there is no real pattern that can describe the relationship between latitude and cloudiness


## Latitude vs. Wind Speed Plot

In [57]:
today = time.strftime("%m/%d/%Y")
wind_speed = clean_data["Wind Speed"]
lat= clean_data["Lat"]

plt.figure(4)
plt.scatter(lat, wind_speed, facecolors="blue", edgecolors="black", alpha = 0.5)
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title(f"City Latitude vs Wind Speed ({today})")
fig.savefig('../output_data/fig4.png', bbox_inches='tight', dpi=150)

print("The plot shows that there is no real pattern that can describe the relationship between latitude and windspeed")

<IPython.core.display.Javascript object>

The plot shows that there is no real pattern that can describe the relationship between latitude and windspeed


## Linear Regression

In [14]:
# OPTIONAL: Create a function to create Linear Regression plots

In [44]:
# Create Northern and Southern Hemisphere DataFrames
northern_df = clean_data.loc[clean_data["Lat"] > 0,:]
southern_df = clean_data.loc[clean_data["Lat"] < 0,:]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [58]:
x_values = northern_df["Lat"]
y_values = northern_df["Max Temp"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.figure(5)
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.title(f"Northern Hemisphere - Max Temp vs. Latitude Linear Regression ({today})")
print(f"The r-squared is: {rvalue}")
print("The regression displays a negative correlation.")
print("In the nothern hemisphere, as you move away from the equator the temperature decreases.")

plt.show()

<IPython.core.display.Javascript object>

The r-squared is: -0.6577847644532192
The regression displays a negative correlation.
In the nothern hemisphere, as you move away from the equator the temperature decreases.


####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [59]:
x_values = southern_df["Lat"]
y_values = southern_df["Max Temp"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.figure(6)
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-50,25),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.title(f"Southern Hemisphere - Max Temp vs. Latitude Linear Regression ({today})")
print(f"The r-squared is: {rvalue}")
print("The regression displays a positive correlation.")
print("In the southern hemisphere, as you get closer to the equator the temperature increases.")

plt.show()

<IPython.core.display.Javascript object>

The r-squared is: 0.806587991210854
The regression displays a positive correlation.
In the southern hemisphere, as you get closer to the equator the temperature increases.


####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [60]:
x_values = northern_df["Lat"]
y_values = northern_df["Humidity"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.figure(7)
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,15),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title(f"Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression ({today})")
print(f"The r-squared is: {rvalue}")
print("The regression does not display much of a correlation between the data sets.") 
plt.show()

<IPython.core.display.Javascript object>

The r-squared is: -0.02002711423020236
The regression does not display much of a correlation between the data sets.


####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [61]:
x_values = southern_df["Lat"]
y_values = southern_df["Humidity"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.figure(8)
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,20),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title(f"Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression ({today})")
print(f"The r-squared is: {rvalue}")
print("The regression does not display much of a correlation between the data sets.")
plt.show()

<IPython.core.display.Javascript object>

The r-squared is: -0.10036399416462048
The regression does not display much of a correlation between the data sets.


####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [62]:
x_values = northern_df["Lat"]
y_values = northern_df["Cloudiness"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.figure(9)
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,15),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title(f"Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression ({today})")
print(f"The r-squared is: {rvalue}")
print("The regression does not display much of a correlation between the data sets.")
plt.show()

<IPython.core.display.Javascript object>

The r-squared is: -0.00802987083718976
The regression does not display much of a correlation between the data sets.


####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [63]:
x_values = southern_df["Lat"]
y_values = southern_df["Cloudiness"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.figure(10)
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,20),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title(f"Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression ({today})")
print(f"The r-squared is: {rvalue}")
print("The regression does not display much of a correlation between the data sets.")


<IPython.core.display.Javascript object>

The r-squared is: 0.005889388835745391
The regression does not display much of a correlation between the data sets.


####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [64]:

x_values = northern_df["Lat"]
y_values = northern_df["Wind Speed"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.figure(11)
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,13),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f"Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression ({today})")
print(f"The r-squared is: {rvalue}")
print("The regression does not display much of a correlation between the data sets.")
plt.show()

<IPython.core.display.Javascript object>

The r-squared is: -0.03815165568465475
The regression does not display much of a correlation between the data sets.


####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [65]:
x_values = southern_df["Lat"]
y_values = southern_df["Wind Speed"]

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

plt.figure(12)
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-50,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f"Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression ({today})")
print(f"The r-squared is: {rvalue}")
print("The regression does not display much of a correlation between the data sets.")
plt.show()

<IPython.core.display.Javascript object>

The r-squared is: -0.06796253473737017
The regression does not display much of a correlation between the data sets.
