# WeatherPy
----



In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)


600

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# Save url information
url = "http://api.openweathermap.org/data/2.5/weather?"

# Build query URL
query_url = url + "appid=" + weather_api_key + "&q="

# Get weather data for all the random cities, generated in the list cities.

# Creating empty lists to store the data
city_name = []
lat = []
long = []
max_temp = []
humidity = []
cloudines = []
wind_speed = []
country = []
date = []


print("--------------------------") 
print("Beginning Data Retrieval  ")
print("--------------------------")

# Creating counts
Record_count = 0
myset_count = 1

for city in cities:
    try:

        weather_json = requests.get(query_url + city).json()
#         print(json.dumps(weather_json, indent=4, sort_keys=True))
        city_name.append(weather_json["name"])
        lat.append(weather_json["coord"]["lat"])
        long.append(weather_json["coord"]["lon"])
        max_temp.append(weather_json["main"]["temp_max"])
        humidity.append(weather_json["main"]["humidity"])
        cloudines.append(weather_json["clouds"]["all"])
        wind_speed.append(weather_json["wind"]["speed"])
        country.append(weather_json["sys"]["country"])
        date.append(weather_json["dt"])
        Record_count+=1
        print(f"Processing Record {Record_count} of Set {myset_count} | {city}")
#         After 50 requests the program will "sleep" for 60 seconds, so it will not reach the limit of 60 rpm
        if Record_count == 50:
              time.sleep(60)
              Record_count = 0
              myset_count+=1
                
    except KeyError:
        print("City not found. Skipping...")
              
print("--------------------------")              
print("Data Retrieval Complete")
print("--------------------------")
              


--------------------------
Beginning Data Retrieval  
--------------------------
Processing Record 1 of Set 1 | ribeira grande
Processing Record 2 of Set 1 | punta arenas
Processing Record 3 of Set 1 | hambantota
Processing Record 4 of Set 1 | mount gambier
Processing Record 5 of Set 1 | san patricio
Processing Record 6 of Set 1 | georgetown
Processing Record 7 of Set 1 | provideniya
Processing Record 8 of Set 1 | katsuura
Processing Record 9 of Set 1 | pierre
Processing Record 10 of Set 1 | busselton
Processing Record 11 of Set 1 | mahebourg
Processing Record 12 of Set 1 | kasama
Processing Record 13 of Set 1 | albany
City not found. Skipping...
Processing Record 14 of Set 1 | carnarvon
Processing Record 15 of Set 1 | jamestown
Processing Record 16 of Set 1 | ambilobe
Processing Record 17 of Set 1 | chalmette
Processing Record 18 of Set 1 | chapais
Processing Record 19 of Set 1 | yellowknife
Processing Record 20 of Set 1 | ushuaia
City not found. Skipping...
Processing Record 21 of Se

Processing Record 35 of Set 4 | marzuq
Processing Record 36 of Set 4 | erenhot
Processing Record 37 of Set 4 | faya
Processing Record 38 of Set 4 | vilyuysk
Processing Record 39 of Set 4 | bathsheba
Processing Record 40 of Set 4 | codrington
Processing Record 41 of Set 4 | usak
Processing Record 42 of Set 4 | antofagasta
Processing Record 43 of Set 4 | yelizovo
Processing Record 44 of Set 4 | grindavik
Processing Record 45 of Set 4 | cileunyi
Processing Record 46 of Set 4 | faanui
Processing Record 47 of Set 4 | warmbad
Processing Record 48 of Set 4 | turan
Processing Record 49 of Set 4 | bahir dar
Processing Record 50 of Set 4 | touros
Processing Record 1 of Set 5 | tongzi
Processing Record 2 of Set 5 | isla mujeres
Processing Record 3 of Set 5 | maarianhamina
Processing Record 4 of Set 5 | cabo san lucas
City not found. Skipping...
Processing Record 5 of Set 5 | emerald
Processing Record 6 of Set 5 | businga
Processing Record 7 of Set 5 | tabas
Processing Record 8 of Set 5 | mayo
Pro

Processing Record 27 of Set 8 | izumi
Processing Record 28 of Set 8 | flagstaff
Processing Record 29 of Set 8 | sao filipe
Processing Record 30 of Set 8 | dmitriyevskoye
Processing Record 31 of Set 8 | hofn
Processing Record 32 of Set 8 | orikum
Processing Record 33 of Set 8 | lokoja
Processing Record 34 of Set 8 | tarakan
Processing Record 35 of Set 8 | karpathos
Processing Record 36 of Set 8 | nanakuli
Processing Record 37 of Set 8 | jining
Processing Record 38 of Set 8 | vanimo
Processing Record 39 of Set 8 | okmulgee
Processing Record 40 of Set 8 | manokwari
Processing Record 41 of Set 8 | chudovo
Processing Record 42 of Set 8 | srinagar
Processing Record 43 of Set 8 | valleyview
Processing Record 44 of Set 8 | praya
Processing Record 45 of Set 8 | taoudenni
Processing Record 46 of Set 8 | pontianak
Processing Record 47 of Set 8 | glencoe
Processing Record 48 of Set 8 | naryan-mar
Processing Record 49 of Set 8 | pastavy
Processing Record 50 of Set 8 | parksville
Processing Record 1

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [4]:
# Creating and displaying the DataFrame
data_df = pd.DataFrame({"City": city_name,
                        "Lat": lat,
                        "Lng": long,
                        "Max Temp": max_temp,
                        "Humidity": humidity,
                        "Cloudiness": cloudines,
                        "Wind Speed": wind_speed,
                        "Country": country,
                        "Date": date})
data_df

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Ribeira Grande,38.52,-28.70,296.15,69,75,1.50,PT,1596920945
1,Punta Arenas,-53.15,-70.92,277.15,69,4,9.30,CL,1596920715
2,Hambantota,6.12,81.12,300.03,75,100,6.71,LK,1596920945
3,Mount Gambier,-37.83,140.77,283.15,93,90,4.10,AU,1596920945
4,San Patricio,19.22,-104.70,301.15,83,75,4.10,MX,1596920945
...,...,...,...,...,...,...,...,...,...
552,City of San Pedro,14.35,121.02,299.26,94,100,1.50,PH,1596920993
553,Laguna,38.42,-121.42,307.59,33,1,2.60,US,1596920994
554,Lorengau,-2.02,147.27,299.68,79,100,2.70,PG,1596920994
555,Ituango,7.17,-75.76,295.88,83,34,1.45,CO,1596920994


In [5]:
# Exporting the city data into a .csv.
data_df.to_csv(output_data_file)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [9]:
# Sorting the humidity column, so we can visually check if there are cities that have humidity > 100%.
data_df = data_df.sort_values("Humidity", ascending = False)
data_df
# The current data, doesnt have cities with humidity >100%, so this step schould be skipped, 
# however since we are working with random cities, our cities data will change each time we run the script
# so the following code is introduced to ensure consistency
data_df = data_df.loc[data_df["Humidity"]<=100]
data_df

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
304,Batemans Bay,-35.72,150.18,285.93,100,100,2.24,AU,1596920972
276,Gmina Końskie,51.19,20.41,294.82,100,1,1.37,PL,1596920969
192,Yelizovo,53.19,158.38,284.15,100,75,3.00,RU,1596920961
372,Wagga Wagga,-35.12,147.37,281.15,100,90,3.60,AU,1596920874
93,Tuktoyaktuk,69.45,-133.04,283.15,100,90,10.30,CA,1596920952
...,...,...,...,...,...,...,...,...,...
461,Farmington,36.73,-108.22,306.15,11,1,7.70,US,1596920931
394,Taoudenni,22.68,-3.98,314.96,10,0,7.59,ML,1596920978
265,North Las Vegas,36.20,-115.12,310.93,7,1,4.60,US,1596920908
303,Saint George,37.10,-113.58,310.93,6,1,2.60,US,1596920967


In [10]:
# Calculating and printing the potential outliers
quartilles = data_df["Humidity"].quantile([.25,.5,.75]).round(3)
lowerq = quartilles[0.25].round(2)
upperq = quartilles[0.75].round(2)
iqr = round(quartilles[0.75] - quartilles[0.25],2).round(2)
lower_bound = round(quartilles[0.25] - (1.5*iqr),2)
upper_bound = round(quartilles[0.75] + (1.5*iqr),2)
        
print("----------------------")
print(f"The lower quartile is: {lowerq} and the upper quartile is: {upperq}")
print(f"The potential outliers are the values below {lower_bound} and the values above {upper_bound}")
print("----------------------")

----------------------
The lower quartile is: 57.0 and the upper quartile is: 87.0
The potential outliers are the values below 12.0 and the values above 132.0
----------------------


In [20]:
# Makeing a new DataFrame equal to the city data to drop all humidity outliers by index.
otlierindexes = (data_df.loc[(data_df["Humidity"] < lower_bound) | (data_df["Humidity"] > upper_bound)]).index
clean_city_data = data_df.drop(otlierindexes, inplace = False)             
clean_city_data

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
304,Batemans Bay,-35.72,150.18,285.93,100,100,2.24,AU,1596920972
276,Gmina Końskie,51.19,20.41,294.82,100,1,1.37,PL,1596920969
192,Yelizovo,53.19,158.38,284.15,100,75,3.00,RU,1596920961
372,Wagga Wagga,-35.12,147.37,281.15,100,90,3.60,AU,1596920874
93,Tuktoyaktuk,69.45,-133.04,283.15,100,90,10.30,CA,1596920952
...,...,...,...,...,...,...,...,...,...
273,Kumul,42.80,93.45,301.02,18,0,3.80,CN,1596920969
377,Flagstaff,35.20,-111.65,303.71,17,40,7.70,US,1596920903
62,Mayor Pablo Lagerenza,-19.93,-60.77,307.29,17,0,2.11,PY,1596920950
271,Atar,20.52,-13.05,313.22,17,29,4.13,MR,1596920969


In [25]:
# Extract relevant fields from the data frame
clean_city_data_rel = clean_city_data[["City", 
                                       "Lat", 
                                       "Max Temp", 
                                       "Humidity",
                                       "Cloudiness", 
                                       "Wind Speed"]]
clean_city_data_rel

# Export the City_Data into a csv
output_data_file2 = "../output_data/relevant_cities_data.csv"
clean_city_data_rel.to_csv(output_data_file2)

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots

In [None]:
# Create Northern and Southern Hemisphere DataFrames

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression