# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [22]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# If running code from previous CSV output (possible to avoid pinging API too many times), uncomment the second line.
run_api = True
#run_api = False

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [23]:
#If running api, run below code. Otherwise, skip whole cell.
if run_api:

    # While loop ensures that the list of cities is always at least 500 unique cities. Defaults to true until code breaks.
    while True:
        # List for holding lat_lngs and cities
        lat_lngs = []
        cities = []

        # Create a set of random lat and lng combinations
        lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
        lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
        lat_lngs = zip(lats, lngs)

        # Identify nearest city for each lat, lng combination
        for lat_lng in lat_lngs:
            city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
            # If the city is unique, then add it to a our cities list
            if city not in cities:
                cities.append(city)

        # If statement confirms city count is sufficient, otherwise the loop is rerun.
        if len(cities) >= 500:
            print(f"There were {len(cities)} unique cities in the list.")
            break
        else:
            print(f"There were only {len(cities)} unique cities in the list. Rerunning list.")
        

There were 607 unique cities in the list.


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [24]:
#If running api, run below code. Otherwise, skip whole cell.
if run_api:

    # Perform API calls and print log of each city as calls are being made.

    # Set base url and initialize counting number.  Full url will follow that of the one below from API doc.
    #      api.openweathermap.org/data/2.5/weather?q={city name}&appid={your api key}
    base_url = "http://api.openweathermap.org/data/2.5/weather"
    counted_cities = 0

    # Initialize empty lists for relevant values.
    city_list = []
    lat_list = []
    lng_list = []
    max_temp_list = []
    humidity_list = []
    cloudiness_list = []
    wind_speed_list = []
    country_list = []
    date_list = []


    # Print for human eyes
    print("Beginning Data Retrieval")
    print("--------------------------------------")


    # for loop runs through all cities in cities array.

    for city in cities:

        # try ensures that even if city is not in database, the loop continues.
        # query_url is the url to grab data from api.  Response is where that data is stored.
        try:
            query_url = f"{base_url}?q={city}&appid={weather_api_key}"
            response = requests.get(query_url).json()
        
            # All list appends append relevant data to relevant list.
            city_list.append(response["name"])
            lat_list.append(response["coord"]["lat"])
            lng_list.append(response["coord"]["lon"])
            max_temp_list.append(response["main"]["temp_max"])
            humidity_list.append(response["main"]["humidity"])
            cloudiness_list.append(response["clouds"]["all"])
            wind_speed_list.append(response["wind"]["speed"])
            country_list.append(response["sys"]["country"])
            date_list.append(response["dt"])
        
            # Counted cities is a variable solely for the human that must waste as a guideline for how many cities remain.
            # Print the number of cities counted and the name of the city counted.
            counted_cities += 1
            print(f"Processing Record {counted_cities} | {city}")
    
        # If try fails, the except is triggered and the message states simply that the city is not found, then skips it.
        except:
            print(f"City not found.  Skipping the city {city}")

Beginning Data Retrieval
--------------------------------------
Processing Record 1 | stara vyzhivka
Processing Record 2 | galatas
Processing Record 3 | srednekolymsk
Processing Record 4 | kodiak
Processing Record 5 | busselton
Processing Record 6 | cape town
Processing Record 7 | coquimbo
Processing Record 8 | la ronge
Processing Record 9 | padang
Processing Record 10 | sibolga
Processing Record 11 | thompson
Processing Record 12 | dzilam gonzalez
Processing Record 13 | avarua
Processing Record 14 | cosamaloapan
Processing Record 15 | mataura
Processing Record 16 | johnstown
Processing Record 17 | ribeira grande
Processing Record 18 | lagoa
Processing Record 19 | atuona
Processing Record 20 | beringovskiy
Processing Record 21 | bredasdorp
City not found.  Skipping the city taolanaro
Processing Record 22 | ushuaia
Processing Record 23 | vardo
Processing Record 24 | bonavista
City not found.  Skipping the city kuche
Processing Record 25 | liverpool
Processing Record 26 | carnarvon
Proce

Processing Record 218 | pisco
Processing Record 219 | kashi
Processing Record 220 | xining
City not found.  Skipping the city buqayq
Processing Record 221 | buraydah
Processing Record 222 | nizwa
Processing Record 223 | victoria
City not found.  Skipping the city mys shmidta
Processing Record 224 | voi
Processing Record 225 | bulungu
City not found.  Skipping the city toliary
Processing Record 226 | beauceville
Processing Record 227 | acapulco
Processing Record 228 | kiunga
Processing Record 229 | mar del plata
Processing Record 230 | bayan
Processing Record 231 | flin flon
Processing Record 232 | charters towers
Processing Record 233 | hami
Processing Record 234 | havre-saint-pierre
Processing Record 235 | howell
Processing Record 236 | ornskoldsvik
Processing Record 237 | dikson
City not found.  Skipping the city lushunkou
Processing Record 238 | gonzalez
Processing Record 239 | coihueco
Processing Record 240 | evensk
Processing Record 241 | banda aceh
Processing Record 242 | clifton

Processing Record 437 | assiniboia
Processing Record 438 | mount isa
Processing Record 439 | zelenoborskiy
Processing Record 440 | svetlaya
Processing Record 441 | honiara
Processing Record 442 | karaton
Processing Record 443 | bitkine
Processing Record 444 | anloga
Processing Record 445 | abilene
Processing Record 446 | qujing
Processing Record 447 | muzhi
Processing Record 448 | nanganga
City not found.  Skipping the city saleaula
Processing Record 449 | muros
Processing Record 450 | san jose
Processing Record 451 | arecibo
Processing Record 452 | tunduma
Processing Record 453 | omsukchan
Processing Record 454 | west bay
Processing Record 455 | yumen
Processing Record 456 | viedma
Processing Record 457 | yerbogachen
Processing Record 458 | puerto gaitan
Processing Record 459 | komsomolskiy
Processing Record 460 | kalmar
City not found.  Skipping the city sentyabrskiy
Processing Record 461 | talara
Processing Record 462 | bachatskiy
Processing Record 463 | antsohihy
Processing Record 

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [25]:
#If running api, run below code.
if run_api:

    # Converting arrays to a dataframe
    weather_df = pd.DataFrame({"City" : city_list,
                               "Latitude" : lat_list,
                               "Longitude" : lng_list,
                               "Maximum Temperature" : max_temp_list,
                               "Humidity" : humidity_list,
                               "Cloudiness" : cloudiness_list,
                               "Wind Speed" : wind_speed_list,
                               "Country" : country_list,
                               "Date" : date_list
                              }
                             )

    # Exports dataframe to CSV file.
    weather_df.to_csv(output_data_file, index=False)

# If not running api, converts previous csv outout to weather dataframe.
else:
    weather_df = pd.read_csv(output_data_file)

In [26]:
# Outputs dataframe
weather_df.head()

Unnamed: 0,City,Latitude,Longitude,Maximum Temperature,Humidity,Cloudiness,Wind Speed,Country,Date
0,Stara Vyzhivka,51.44,24.44,294.98,75,91,3.98,UA,1595146269
1,Galatas,37.5,23.45,302.59,58,0,1.79,GR,1595146269
2,Srednekolymsk,67.45,153.68,296.58,42,0,4.49,RU,1595146270
3,Saint Paul Harbor,57.79,-152.41,287.15,82,90,2.6,US,1595146270
4,Busselton,-33.65,115.33,289.82,45,83,1.18,AU,1595146271


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [27]:
# Humidity is defaulted to over 100.  This is checked and, if false, the below cell in this section is skipped and
#    the data is assumed clean.
humidity_over_100 = True
if weather_df["Humidity"].max()<=100:
    humidity_over_100 = False
    clean_city_data = weather_df

In [28]:
# If humidity is not greater than 100, skip this cell
if humidity_over_100:
    
    #  Get the indices of cities that have humidity over 100%.
    too_humid_city_indices = weather_df.loc[weather_df["Humidity"] > 100].index.values.tolist()
    
    # Drop all indices that correspond with too high humidity
    clean_city_data = weather_df.drop(too_humid_city_indices)
    
    # Export the City_Data into a csv
    clean_city_data.to_csv(output_data_file, index=False)    

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

In [30]:
# OPTIONAL: Create a function to create Linear Regression plots

In [31]:
# Create Northern and Southern Hemisphere DataFrames

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression