# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [35]:
# List for holding lat_lngs and cities
lat_lngs = []
success_lats = []
success_lngs = []
cities = []
city_names = []
countries = []
city_country = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]) #.city_name
    
    # If the city is unique, then add it to the cities list
    if city not in cities:
        cities.append(city)
        city_names.append(city.city_name)
        countries.append(city.country_code)
        success_lats.append(lat_lng[0])
        success_lngs.append(lat_lng[1])

# city_country = zip(city_names, countries)
# city_country = list(map(list, city_country))


# Print the city count to confirm sufficient count
print(f"The number randomly generated list of cities = {len(cities)}")
print(f"The number randomly generated list of city names = {len(city_names)}")
print(f"The number randomly generated list of countries = {len(countries)}")
print(f"The number randomly generated list of latitude coordinates = {len(success_lats)}")
print(f"The number randomly generated list of longitude coordinates = {len(success_lngs)}")
# print(f"The number randomly generated list of city-country names = {len(city_country)}")
print()
print(f"The number randomly generated list of unique countries = {len(list(set(countries)))}")

# print(success_lats)

The number randomly generated list of cities = 577
The number randomly generated list of city names = 577
The number randomly generated list of countries = 577
The number randomly generated list of latitude coordinates = 577
The number randomly generated list of longitude coordinates = 577

The number randomly generated list of unique countries = 113


In [38]:
cities_data = {"City": city_names, "Country": countries, "Search Lats": success_lats, "Search Lngs": success_lngs}
cities_df = pd.DataFrame(cities_data)
cities_df["City-Country"] = cities_df[["City", "Country"]].apply(lambda x: ', '.join(x[x.notnull()]), axis = 1)
cities_df = cities_df[["City-Country", "City", "Country", "Search Lats", "Search Lngs"]]

cities_df.head()

Unnamed: 0,City-Country,City,Country,Search Lats,Search Lngs
0,"yeppoon, au",yeppoon,au,-21.893134,150.440057
1,"rikitea, pf",rikitea,pf,-84.88527,-137.580966
2,"fergus, ca",fergus,ca,43.7003,-80.468179
3,"albany, au",albany,au,-69.396079,109.355707
4,"morehead, pg",morehead,pg,-9.372689,141.144073


In [None]:
# In the starter code example, 1,500 sets of randomly chosen latitude and longitude 
# yielded 635 unique city names.  That means that 58% of randomly chosen lat-lng 
# coordinates were duplicates and were rejected, if I read the documentation correctly.

In [None]:
# Since 2/3 of the surface area of the globe is water, there is a likelihood that 2/3 
# of the choices made by randomly selecting geocoordinates will be somewhere other than 
# on land, which means that at least 2/3 of the cities selected by using random 
# coordinates will be clustered on shorelines.  Bottom line, the real task is to randomly
# select geocoordinates only for the 1/3 of the planet surface that is land.

# My solution is to reject any cities that are greater than 15 miles from the randomly
# chosen geocoordinates.  This requires me to measure the distance from the randomly 
# chosen geocoordinates to the nearest city selected.  I may need to play with the method
# further to eyeball the best maximum distance.  With a highly iterative method, I could
# perhaps get maximum approximate equal distribution between cities to minimize any 
# clustering tendencies. But I'm going to settle for an eyeball check on top of a
# pretty decent methodology.

# This methodology is likely also to skew away from cities in more remote locations.
# That said, our formula is still likely to be fairly representative.

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression