# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Import dependencies and setup for the script
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
from scipy.stats import linregress

# import citipy for based on latitude and longitude
from citipy import citipy
import datetime
today = datetime.date.today()

# Import the API Keys Google and OpenWeather (blocked w/ gitignore)
from api_keys import weather_api_key

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Use these ranges of latitudes and longitudes
lat_range = (-90, 90)
long_range = (-180, 180)

## Generate Cities List

In [2]:
# Make the lists for holding the latitude and longitudes per cities
lat_longs = []
cities = []

# To create a list of random cities with lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
longs = np.random.uniform(long_range[0], long_range[1], size=1500)
lat_longs = zip(lats, longs)

# Find and identify nearest city for each latitude, longitude combination
for lat_long in lat_longs:
    city = citipy.nearest_city(lat_long[0], lat_long[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# CHECKPOINT: Print the city count to confirm sufficient count
print(f'There is {len(cities)} cities in the list.')
print(f'It will take {round(len(cities)/60, 2)} minutes to connect to the servers in each city.')

There is 609 cities in the list.
It will take 10.15 minutes to connect to the servers in each city.


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
# Get the number of sets needed in the for loop
total_set_number = (len(cities) // 50) + 1

# Set up the lists which will be used to create dataframe
date = []
city = []
country = []
max_temp = []
humidity = []
lat = []
lon = []
cloudiness = []
wind_speed = []

#Add a failed counter
failed_counter = 0
url = "http://api.openweathermap.org/data/2.5/weather?appid=" + weather_api_key + "&units=imperial&q="
url = "http://api.openweathermap.org/data/2.5/weather?appid=" + weather_api_key + "&units=Imperial&APPID="
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

# Gather weather information for each city
print('Getting data from site...')
print('------------------------------------------------')

# 
for set_counter in range(total_set_number):
    
    # Use try and except block: prevents the code from stopping when the set does not have 50 records
    try:    
        
        # For each record, OpenWeather API will pull the information and append to corresponding lists
        for record_counter in range(50):
            
            # Apply a counter to show the actual index the city is in the cities list 
            counter = set_counter * 50 + record_counter
            
            # CHECKPOINT
            # Print the current set and record index
            print(f'Processing Record {record_counter + 1} of Set {set_counter + 1} | {cities[counter]}')

            # Use try and except block: prevents the code from stopping when a city cannot be found
            try:
                
                # Get the query url and make a call
                query_url = url + cities[counter]
                weather_json = requests.get(query_url, time.sleep(1)).json()
                
             
                # Append to lists
                date.append(weather_json["dt"])
                city.append(cities[counter])
                country.append(weather_json["sys"]["country"])
                max_temp.append(weather_json["main"]["temp_max"])
                humidity.append(weather_json["main"]["humidity"])
                lat.append(weather_json["coord"]["lat"])
                lon.append(weather_json["coord"]["lon"])             
                cloudiness.append(weather_json["clouds"]["all"])
                wind_speed.append(weather_json["wind"]["speed"])
                              
                         
            # If the city is not found, print it out, and increment to the failed counter
            except KeyError:

                print('Oops!! The city not found. Skipping it...')
                failed_counter += 1
            
            # (For testing)If any other error, print it out
            except:
                
                print('Oh no! Unexpected Error')
    
    # Break out from the loop once the IndexError was found
    except IndexError:
        
        break
        
# Print out the number of cities found and not found
print('-----------------------------')
print(f'There are {failed_counter} cities not found. {len(cities) - failed_counter} cities in dataframe.')
print('The data has processed, complete.')      
print('-----------------------------')

Getting data from site...
------------------------------------------------
Processing Record 1 of Set 1 | vao
Oops!! The city not found. Skipping it...
Processing Record 2 of Set 1 | carnarvon
Oops!! The city not found. Skipping it...
Processing Record 3 of Set 1 | saint-philippe
Oops!! The city not found. Skipping it...
Processing Record 4 of Set 1 | kerema
Oops!! The city not found. Skipping it...
Processing Record 5 of Set 1 | lavrentiya
Oops!! The city not found. Skipping it...
Processing Record 6 of Set 1 | kapaa
Oops!! The city not found. Skipping it...
Processing Record 7 of Set 1 | new norfolk
Oops!! The city not found. Skipping it...
Processing Record 8 of Set 1 | muroto
Oops!! The city not found. Skipping it...
Processing Record 9 of Set 1 | fairbanks
Oops!! The city not found. Skipping it...
Processing Record 10 of Set 1 | taolanaro
Oops!! The city not found. Skipping it...
Processing Record 11 of Set 1 | castro
Oops!! The city not found. Skipping it...
Processing Record 12 

Oops!! The city not found. Skipping it...
Processing Record 50 of Set 2 | naze
Oops!! The city not found. Skipping it...
Processing Record 1 of Set 3 | kosh-agach
Oops!! The city not found. Skipping it...
Processing Record 2 of Set 3 | katsuura
Oops!! The city not found. Skipping it...
Processing Record 3 of Set 3 | buin
Oops!! The city not found. Skipping it...
Processing Record 4 of Set 3 | vestmannaeyjar
Oops!! The city not found. Skipping it...
Processing Record 5 of Set 3 | powell river
Oops!! The city not found. Skipping it...
Processing Record 6 of Set 3 | phan rang
Oops!! The city not found. Skipping it...
Processing Record 7 of Set 3 | hofn
Oops!! The city not found. Skipping it...
Processing Record 8 of Set 3 | bredasdorp
Oops!! The city not found. Skipping it...
Processing Record 9 of Set 3 | pacific grove
Oops!! The city not found. Skipping it...
Processing Record 10 of Set 3 | salym
Oops!! The city not found. Skipping it...
Processing Record 11 of Set 3 | chardara
Oops!! T

Oops!! The city not found. Skipping it...
Processing Record 49 of Set 4 | tabiauea
Oops!! The city not found. Skipping it...
Processing Record 50 of Set 4 | asau
Oops!! The city not found. Skipping it...
Processing Record 1 of Set 5 | balabac
Oops!! The city not found. Skipping it...
Processing Record 2 of Set 5 | byron bay
Oops!! The city not found. Skipping it...
Processing Record 3 of Set 5 | matagami
Oops!! The city not found. Skipping it...
Processing Record 4 of Set 5 | miri
Oops!! The city not found. Skipping it...
Processing Record 5 of Set 5 | vaitupu
Oops!! The city not found. Skipping it...
Processing Record 6 of Set 5 | bonga
Oops!! The city not found. Skipping it...
Processing Record 7 of Set 5 | ilulissat
Oops!! The city not found. Skipping it...
Processing Record 8 of Set 5 | marzuq
Oops!! The city not found. Skipping it...
Processing Record 9 of Set 5 | zhezkazgan
Oops!! The city not found. Skipping it...
Processing Record 10 of Set 5 | washington
Oops!! The city not fo

Oops!! The city not found. Skipping it...
Processing Record 48 of Set 6 | okhotsk
Oops!! The city not found. Skipping it...
Processing Record 49 of Set 6 | kano
Oops!! The city not found. Skipping it...
Processing Record 50 of Set 6 | adrar
Oops!! The city not found. Skipping it...
Processing Record 1 of Set 7 | iskateley
Oops!! The city not found. Skipping it...
Processing Record 2 of Set 7 | provideniya
Oops!! The city not found. Skipping it...
Processing Record 3 of Set 7 | san rafael
Oops!! The city not found. Skipping it...
Processing Record 4 of Set 7 | yakima
Oops!! The city not found. Skipping it...
Processing Record 5 of Set 7 | whitehorse
Oops!! The city not found. Skipping it...
Processing Record 6 of Set 7 | ronne
Oops!! The city not found. Skipping it...
Processing Record 7 of Set 7 | labytnangi
Oops!! The city not found. Skipping it...
Processing Record 8 of Set 7 | iaciara
Oops!! The city not found. Skipping it...
Processing Record 9 of Set 7 | methoni
Oops!! The city no

Oops!! The city not found. Skipping it...
Processing Record 47 of Set 8 | trelew
Oops!! The city not found. Skipping it...
Processing Record 48 of Set 8 | umzimvubu
Oops!! The city not found. Skipping it...
Processing Record 49 of Set 8 | ijaki
Oops!! The city not found. Skipping it...
Processing Record 50 of Set 8 | bilibino
Oops!! The city not found. Skipping it...
Processing Record 1 of Set 9 | vetlanda
Oops!! The city not found. Skipping it...
Processing Record 2 of Set 9 | kumluca
Oops!! The city not found. Skipping it...
Processing Record 3 of Set 9 | san quintin
Oops!! The city not found. Skipping it...
Processing Record 4 of Set 9 | los llanos de aridane
Oops!! The city not found. Skipping it...
Processing Record 5 of Set 9 | auki
Oops!! The city not found. Skipping it...
Processing Record 6 of Set 9 | villa bruzual
Oops!! The city not found. Skipping it...
Processing Record 7 of Set 9 | revadanda
Oops!! The city not found. Skipping it...
Processing Record 8 of Set 9 | barry
Oo

Oops!! The city not found. Skipping it...
Processing Record 45 of Set 10 | yecla
Oops!! The city not found. Skipping it...
Processing Record 46 of Set 10 | meyungs
Oops!! The city not found. Skipping it...
Processing Record 47 of Set 10 | walvis bay
Oops!! The city not found. Skipping it...
Processing Record 48 of Set 10 | phetchaburi
Oops!! The city not found. Skipping it...
Processing Record 49 of Set 10 | raga
Oops!! The city not found. Skipping it...
Processing Record 50 of Set 10 | sao gabriel da cachoeira
Oops!! The city not found. Skipping it...
Processing Record 1 of Set 11 | creel
Oops!! The city not found. Skipping it...
Processing Record 2 of Set 11 | keflavik
Oops!! The city not found. Skipping it...
Processing Record 3 of Set 11 | noumea
Oops!! The city not found. Skipping it...
Processing Record 4 of Set 11 | shaowu
Oops!! The city not found. Skipping it...
Processing Record 5 of Set 11 | santa cruz
Oops!! The city not found. Skipping it...
Processing Record 6 of Set 11 |

Oops!! The city not found. Skipping it...
Processing Record 43 of Set 12 | kalikino
Oops!! The city not found. Skipping it...
Processing Record 44 of Set 12 | leshukonskoye
Oops!! The city not found. Skipping it...
Processing Record 45 of Set 12 | burnie
Oops!! The city not found. Skipping it...
Processing Record 46 of Set 12 | garden city
Oops!! The city not found. Skipping it...
Processing Record 47 of Set 12 | baykit
Oops!! The city not found. Skipping it...
Processing Record 48 of Set 12 | aromashevo
Oops!! The city not found. Skipping it...
Processing Record 49 of Set 12 | high level
Oops!! The city not found. Skipping it...
Processing Record 50 of Set 12 | mazamari
Oops!! The city not found. Skipping it...
Processing Record 1 of Set 13 | xihe
Oops!! The city not found. Skipping it...
Processing Record 2 of Set 13 | vrangel
Oops!! The city not found. Skipping it...
Processing Record 3 of Set 13 | solsvik
Oops!! The city not found. Skipping it...
Processing Record 4 of Set 13 | kia

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create a DataFrame with the city information for below paramaters
city_dict = {'City': city,
             'Latitude': lat,
             'Longitude': lon,
             'Max Temperature': max_temp,
             'Humidity (%)': humidity,
             'Cloudiness (%)': cloudiness,
             'Wind Speed (MPH)': wind_speed,
             'Country': country,
             'Date': date
            }
city_df = pd.DataFrame(city_dict)

# Import the DataFrame into the CSV file
csv_path = '../output_data/cities.csv'
city_df.to_csv(csv_path)

# Display DataFrame
city_df

In [None]:
# Provide statistical analysis on the DataFrame
city_df.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Drop the rows of data that have a humidity higher than 100%
cleaned_cities_df = city_df.loc[city_df['Humidity (%)'] <= 100]
cleaned_cities_df

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

The graph below provides a visual view of the the relationship between global latitude and temperature of each city within the data provided. The plot graphic shows how the data of the city temperatures rise as latitude approaches 0 from -60(ish) degrees latitude, but appear to decrease more steeply as they increase from 0. 

In [None]:
# Scatter Plot: Latitude and Max Temperature 
# Include Title and Date
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature', 
                       title=f' Max Temperature (F) vs. Latitude ({today})')
plt.grid()
plt.savefig('../output_data/Latitude_MaxTemperature_Plot.png')

## Latitude vs. Humidity Plot
This graph provides a view of the cities humidity between 60 and 80 degrees latitude.  Humidity is above 60% with little striking increase or decrease as latitude changes. 

In [None]:
# Scatter PLot: Latitude and Humidity
# Include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', 
                       title=f'Humidity (%) vs. Latitude ({today})')
plt.grid()
plt.savefig('../output_data/Latitude_Humidity_Plot.png')

## Latitude vs. Cloudiness Plot
The graph shows cities that are close togehter with and without cloud coverage.  Latitue appears evenly distributed. No strong coorliation between the two measurements.  

In [None]:
# Scatter Plot Latitude and Cloudiness
# Include the date in title 
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', 
                       title=f'Cloudiness (%) vs. Latitude ({today})')
plt.grid()
plt.savefig('../output_data/Latitude_Cloudiness_Plot.png')

## Latitude vs. Wind Speed Plot
The plot shows almost equal parts wind speed across the graphy with a slight increase around 37 to 70 degreese.  

In [None]:
# Scatter Plot Latitude and Wind Speed
# Include the date in title 
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', 
                       title=f'Wind Speed (mph) vs. Latitude ({today})')
plt.grid()
plt.savefig('../output_data/Latitude_WindSpeed_Plot.png')

## Linear Regression

In [None]:
# Create two DataFrame's determined by the latitude
north_cities_df = cleaned_cities_df.loc[cleaned_cities_df['Latitude'] >= 0]
south_cities_df = cleaned_cities_df.loc[cleaned_cities_df['Latitude'] < 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Linear Regression Plot:  Latitude and Max temperature in the Northern Hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature', title='Northern Hemisphere - Temperature (F) vs. Latitude')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Max Temperature'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Max Temperature'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq, (45, 30),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression
Let's face it, it dang hot in the south!  The graph shows that as the latitude decreaases the tempatures rise.  

In [None]:
# Linear Regression Plot:  Latitude and Max Temperature in the Southern Hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature', title='Latitude vs. Max Temperature')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Max Temperature'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Max Temperature'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-25,10),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

The city temperature does increase as latitude approaches 0 and decreases as it increases from 0 degrees. The south has a positive correlation.  If you head towards the equator will rise city tempatures.  

In [None]:
# Linear Regression Plot: Latitude and Humidity in the Northern Hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', title='Northern Hemisphere - Humidity (%) vs. Latitude')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Humidity (%)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Humidity (%)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(46,15),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression
Much like the overall data, we can see that there are clusters of cities at 0%, 40%, 75%, and 100% cloudiness. The distribution of cities across latitudes appears fairly even at each of those points, but once again we see that a higher quantity of cities closer to a latitude of 0 may be impacting the correlation. At 0.06, there is essentially no correlation, which means that southern latitude is not a good predictor of cloudiness of a given city.

In [None]:
# Linear Regression Plot: Latitude and Humidity in the Southern Hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', title='Southern Hemisphere - Humidity (%) vs. Latitude')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Humidity (%)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Humidity (%)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-22,18),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Linear Regression Plot: Latitude and Cloudiness in the Northern Hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', title='Northern Hemisphere - Cloudiness (%) vs. Latitude')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Cloudiness (%)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Cloudiness (%)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(45,30),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Linear Regression Plot: Latitude and Cloudiness in the Southern Hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', title='Southern Hemisphere - Cloudiness (%) vs. Latitude')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Cloudiness (%)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Cloudiness (%)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-56,50),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression
This scatter plot is not a good predictor of cloudiness.

In [None]:
# Linear Regression Plot: Latitude and Wind Speed in the Northern Hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', title='Northern Hemisphere - Wind Speed (mph) vs. Latitude')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Wind Speed (MPH)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Wind Speed (MPH)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(0,13),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression
There does not appear to be a strong relationship between the two data points provided, city latitude and wind speed. 

In [None]:
# Linear Regression Plot: Latitude and Wind Speed in the Southern Hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', title='Southern Hemisphere - Wind Speed (mph) vs. Latitude')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Wind Speed (MPH)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Wind Speed (MPH)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-22,13),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')