# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json
import scipy.stats as st

# Import API key
from api_key import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

611

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# Building weather_url
url ="http://api.openweathermap.org/data/2.5/weather?"
units="imperial"

# base_url = "http://api.openweathermap.org/data/2.5/weather"
# units = "imperial"
# Concatinating weather_url_query

query_url=(f"{url}appid={weather_api_key}&units={units}&q=")
query_url

# weather_url_query = (f"{base_url}appid={weather_api_key}&units={units}&q=")
# weather_url_query

# Print statements to begin API retrieval 
#print("Beginning of Data Retrieval")
#print("---------------------------")
#print is working

# Creating empty lists to populate when calling weather_api
city_list = []
cloudiness = []
country = []
date = []
humidity = []
latitude =[]
longitude =[]
max_temp = []
wind_speed = []
index_list = 0

record = 1

# For loop to call the API
for city in cities: 
    try: 
# requesting from API to search each city in the list cities
        response = requests.get(query_url + city).json()
#         response
#         print(json.dumps(response, indent=4,sort_keys= True))
#       Appending information to empty lists as calls are concluded
        city_list.append(response["name"])
        cloudiness.append(response["clouds"]["all"])
        country.append(response["sys"]["country"])
        date.append(response["dt"])
        humidity.append(response["main"]["humidity"])
        latitude.append(response["coord"]["lat"])
        longitude.append(response["coord"]["lon"])
        max_temp.append(response["main"]["temp_max"])
        wind_speed.append(response["wind"] ["speed"])
        if index_list == 50:
            index_list = 0
            record = record+1
        else: 
            index_list = index_list+1
        time.sleep (1.01)
        print(city)
    except KeyError: 
        print("City not found ...")
    continue
        
    

ushuaia
fairbanks
xingtai
odweyne
vanimo
alice springs
punta arenas
mar del plata
algiers
luderitz
cidreira
rioja
goderich
lorengau
sfantu gheorghe
thompson
barrow
khatanga
City not found ...
cape town
verkhnyaya inta
mataura
rikitea
varhaug
nikolskoye
port lincoln
hermanus
maragogi
sur
port alfred
hambantota
moron
rocha
guerrero negro
busselton
aljustrel
hangu
City not found ...
mpongwe
puerto ayora
hobart
esperance
llanes
grindavik
salalah
souillac
moerai
vaini
kyzyl-suu
arraial do cabo
batagay
nanortalik
City not found ...
narsaq
longyearbyen
araxa
saint george
chokurdakh
askino
gornyy
berwick
tarauaca
torbay
new norfolk
ilulissat
puro
waingapu
trimbach
majene
chapais
alta
shache
tepelene
norman wells
albany
mehamn
City not found ...
ngunguru
sao felix do xingu
City not found ...
bengkulu
cairns
leningradskiy
rundu
port hedland
City not found ...
kapaa
ostrovnoy
ivanava
le port
baherden
City not found ...
tiksi
medicine hat
providencia
severo-kurilsk
camacha
bluff
chuy
robe
turbat
b

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [9]:
weather_dict = {"City": city_list,
               "Cloudiness": cloudiness,
               "Country": country,
               "Date": date,
               "Humidity": humidity,
               "Latitude": latitude, 
               "Longitude": longitude, 
               "Max Temp": max_temp, 
               "Wind Speed": wind_speed}

weather_df = pd.DataFrame(weather_dict)

weather_df.count()

City          559
Cloudiness    559
Country       559
Date          559
Humidity      559
Latitude      559
Longitude     559
Max Temp      559
Wind Speed    559
dtype: int64

In [14]:
weather_df.to_csv("../output_data/cities.csv", index=False, header=True)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [11]:
#  Get the indices of cities that have humidity under 100%.
humidity_under_100_df=weather_df.loc[weather_df["Humidity"]<=100]
humidity_under_100_df

Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Latitude,Longitude,Max Temp,Wind Speed
0,Ushuaia,40,AR,1597095386,80,-54.80,-68.30,33.80,18.34
1,Fairbanks,1,US,1597095327,58,64.84,-147.72,59.00,10.29
2,Xingtai,54,CN,1597095386,93,37.06,114.49,77.67,0.94
3,Oodweyne,61,SO,1597095386,66,9.41,45.06,73.81,13.33
4,Vanimo,71,PG,1597095386,83,-2.67,141.30,78.67,4.21
...,...,...,...,...,...,...,...,...,...
554,Faya,51,SA,1597095451,77,18.39,42.45,69.80,4.70
555,Emerald,0,AU,1597095451,81,-23.53,148.17,46.40,7.43
556,Canterbury,87,GB,1597095451,85,51.28,1.08,72.00,3.33
557,Ilebo,34,CD,1597095451,59,-4.32,20.58,77.86,0.45


In [12]:
#  Get the indices of cities that have humidity over 100%.
city_humidity_over_100=weather_df.loc[weather_df["Humidity"]>100].index
city_humidity_over_100

Int64Index([], dtype='int64')

In [13]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
humidity_outliers=weather_df["Humidity"]
humidity_outliers
quartiles= humidity_outliers.quantile([.25,.5,.75])
lowerquartile=quartiles[0.25]
upperquartile=quartiles[0.75]
iqr=upperquartile-lowerquartile
humidity_lower_bound=lowerquartile - (1.5*iqr)
humidity_upper_bound=upperquartile + (1.5*iqr)
print(f"{humidity_lower_bound} and {humidity_upper_bound} are the outliers")

outliers_index=weather_df.loc[(weather_df["Humidity"]<humidity_lower_bound) & (weather_df["Humidity"]>humidity_upper_bound)].index
outliers_index



city_humidity_data = weather_df.drop(outliers_index, inplace=False)
clean_city_data=city_humidity_data.sort_values("Humidity", ascending=False)
clean_city_data

clean_city_data.sort_values("Humidity", ascending = False)

21.0 and 125.0 are the outliers


Unnamed: 0,City,Cloudiness,Country,Date,Humidity,Latitude,Longitude,Max Temp,Wind Speed
159,Airai,52,TL,1597095404,100,-8.93,125.41,57.72,1.45
16,Barrow,90,US,1597095388,100,71.29,-156.79,39.20,10.29
152,Carnarvon,90,AU,1597095403,100,-24.87,113.63,62.60,16.11
530,Mataram,75,ID,1597095447,100,-8.58,116.12,73.40,5.59
539,Khani,90,GE,1597095448,100,41.96,42.96,75.20,6.93
...,...,...,...,...,...,...,...,...,...
469,Chino Valley,1,US,1597095439,7,34.76,-112.45,96.01,18.34
160,Calama,0,CL,1597095404,3,-22.47,-68.93,69.80,19.46
73,Ngunguru,100,NZ,1597095394,3,-35.62,174.50,61.00,20.00
186,Xixiang,98,CN,1597095407,1,35.16,112.86,82.99,3.78


In [None]:
# Extract relevant fields from the data frame
city_weather_df=clean_city_data[["Latitude","Max Temp","Humidity","Cloudiness","Wind Speed"]]
city_weather_df

# Export the City_Data into a csv
city_weather_df.to_csv("../output_data/City_Weather_Data.csv", index=False, header=True)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
plt.scatter(city_weather_df["Latitude"],city_weather_df["Max Temp"])
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (f)")
plt.title("Latitude vs Max Temp")

#Set grid line
plt.grid(linestyle='-', linewidth=1, alpha = 0.5)

#Save the plotted figure as .pngs
plt.savefig("../Images/Latitude vs Max Temp.png")

plt.show()

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(city_weather_df["Latitude"],city_weather_df["Humidity"])
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Latitude vs Humidity")
#Set grid line
plt.grid(linestyle='-', linewidth=1, alpha = 0.5)

#Save the plotted figure as .pngs
plt.savefig("../Images/Latitude vs Humidity.png")

plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(city_weather_df["Latitude"],city_weather_df["Humidity"])
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.title("Latitude vs. Cloudiness")

plt.grid(linestyle="-", linewidth = 1, alpha = 0.5)
plt.savefig("../Images/Latitude vs Cloudiness.png")

plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(city_weather_df["Latitude"],city_weather_df["Wind Speed"])
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.title("Latitude vs. Wind Speed")

plt.grid(linestyle="-", linewidth = 1, alpha = 0.5)
plt.savefig("../Images/Latitude vs Windspeed.png")

plt.show()

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots
northern_hemisphere=city_weather_df.loc[city_weather_df["Latitude"]>=0]
northern_hemisphere

In [None]:
southern_hemisphere=city_weather_df.loc[city_weather_df["Latitude"]<=0]
southern_hemisphere

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
st.linregress(northern_hemisphere["Latitude"],northern_hemisphere["Max Temp"])

x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Max Temp"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
#plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.title("Northern Hemisphere- Max Temp vs. Latitude Linear Regression")
plt.show()
plt.savefig("../Images/Northern Hemisphere (Max Temp vs. Latitude Linear Regression).png")
%matplotlib inline

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
st.linregress(southern_hemisphere["Latitude"],southern_hemisphere["Max Temp"])

x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Max Temp"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
#plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.title("Southern Hemisphere- Max Temp vs. Latitude Linear Regression")
plt.show()
plt.savefig("../Images/Southern Hemisphere (Max Temp vs. Latitude Linear Regression).png")
%matplotlib inline

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
st.linregress(northern_hemisphere["Latitude"],northern_hemisphere["Humidity"])

x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")

plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.title("Northern Hemisphere- Humidity vs. Latitude Linear Regression")
plt.show()
plt.savefig("../Images/Northern Hemisphere (Humidity vs. Latitude Linear Regression).png")
%matplotlib inline

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
st.linregress(southern_hemisphere["Latitude"],southern_hemisphere["Humidity"])

x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Humidity"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")

plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.title("Southern Hemisphere- Humidity vs. Latitude Linear Regression")
plt.show()
plt.savefig("../Images/Southern Hemisphere (Humidity vs. Latitude Linear Regression).png")
%matplotlib inline

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
st.linregress(northern_hemisphere["Latitude"],northern_hemisphere["Cloudiness"])

x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Cloudiness"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")

plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.title("Northern Hemisphere- Cloudiness vs. Latitude Linear Regression")
plt.show()
plt.savefig("../Images/Northern Hemisphere (Cloudiness vs. Latitude Linear Regression).png")
%matplotlib inline

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
st.linregress(southern_hemisphere["Latitude"],southern_hemisphere["Cloudiness"])

x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Cloudiness"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")

plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.title("Southern Hemisphere- Cloudiness vs. Latitude Linear Regression")
plt.show()
plt.savefig("../Images/Southern Hemisphere (Cloudiness vs. Latitude Linear Regression).png")
%matplotlib inline

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
st.linregress(northern_hemisphere["Latitude"],northern_hemisphere["Wind Speed"])

x_values = northern_hemisphere["Latitude"]
y_values = northern_hemisphere["Wind Speed"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")

plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.title("Northern Hemisphere- Wind Speed (mph) vs. Latitude Linear Regression")
plt.show()
plt.savefig("../Images/Northern Hemisphere (Wind Speed (mph) vs. Latitude Linear Regression).png")
%matplotlib inline

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
st.linregress(southern_hemisphere["Latitude"],southern_hemisphere["Wind Speed"])

x_values = southern_hemisphere["Latitude"]
y_values = southern_hemisphere["Wind Speed"]
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")

plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.title("Southern Hemisphere- Wind Speed (mph) vs. Latitude Linear Regression")
plt.show()
plt.savefig("../Images/Southern Hemisphere (Wind Speed (mph) vs. Latitude Linear Regression).png")
%matplotlib inline