# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [27]:
# Python APIs Homework
# Georgia Tech Analytics Bootcamp Fall 2021
#
# Import dependencies and setup for the script
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json

# Import the API Keys Google and OpenWeather (not included in upload)
from api_keys import weather_api_key

# Load & use citipy to determine city based on latitude and longitude
try:
    from citipy import citipy
except:
    !pip install citipy
from citipy import citipy

# Use these ranges of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)


## Generate Cities List

In [28]:
# Make the lists for holding lat_lngs and cities
lat_lngs = []
cities = []

# To create a list of random cities with lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Find and identify nearest city for each latitude, longitude combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# CHECKPOINT: Print the city count to confirm sufficient count
print(f'There is {len(cities)} in the list.')
print(f'It will take {round(len(cities)/60, 2)} minutes to ping the server for all cities.')

There is 617 in the list.
It will take 10.28 minutes to ping the server for all cities.


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [29]:
# Get the number of sets needed in the for loop
total_set_number = (len(cities) // 50) + 1

# Set up the lists which will be used to create dataframe
city = []
lat = []
lon = []
city = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []
failed_counter = 0
url = "http://api.openweathermap.org/data/2.5/weather?appid=" + weather_api_key + "&units=metric&q="

# Start to extracting cities' weather info
print('Beginning Data Retrieval')
print('-----------------------------')

for set_counter in range(total_set_num):
    
    # Use try and except block: prevents the code from stopping when the set does not have 50 records
    try:    
        
        # For each record, OpenWeather API will pull the information and append to corresponding lists
        for record_counter in range(50):
            
            # Apply a counter to show the actual index the city is in the cities list 
            counter = set_counter * 50 + record_counter
            
            # CHECKPOINT
            # Print the current set and record index
            print(f'Processing Record {record_counter + 1} of Set {set_counter + 1} | {cities[counter]}')

            # Use try and except block: prevents the code from stopping when a city cannot be found
            try:
                
                # Get the query url and make a call
                query_url = url + cities[counter]
                weather_json = requests.get(query_url, time.sleep(1)).json()
                
             
                # Append to lists
                lat.append(weather_json["coord"]["lat"])
                lon.append(weather_json["coord"]["lon"])
                max_temp.append(weather_json["main"]["temp_max"])
                humidity.append(weather_json["main"]["humidity"])
                cloudiness.append(weather_json["clouds"]["all"])
                wind_speed.append(weather_json["wind"]["speed"])
                country.append(weather_json["sys"]["country"])
                date.append(weather_json["dt"])
                city.append(cities[counter])
            
            # If the city is not found, print it out, and increment to the failed counter
            except KeyError:

                print('Oops!! The city not found. Skipping it...')
                failed_counter += 1
            
            # (For testing)If any other error, print it out
            except:
                
                print('Unexpected Error')
    
    # Break out from the loop once the IndexError was found
    except IndexError:
        
        break
        
# Print out the number of cities found and not found
print('-----------------------------')
print(f'There are {failed_counter} cities not found. {len(cities) - failed_counter} cities in dataframe.')
print('Data Retrieval Complete')      
print('-----------------------------')

Beginning Data Retrieval
-----------------------------
Processing Record 1 of Set 1 | faya
Processing Record 2 of Set 1 | cherskiy
Processing Record 3 of Set 1 | takefu
Processing Record 4 of Set 1 | rikitea
Processing Record 5 of Set 1 | uttarkashi
Processing Record 6 of Set 1 | cap malheureux
Processing Record 7 of Set 1 | ayan
Processing Record 8 of Set 1 | lorengau
Processing Record 9 of Set 1 | namatanai
Processing Record 10 of Set 1 | zafra
Processing Record 11 of Set 1 | vaini
Processing Record 12 of Set 1 | george
Processing Record 13 of Set 1 | thompson
Processing Record 14 of Set 1 | saint anthony
Processing Record 15 of Set 1 | cape town
Processing Record 16 of Set 1 | souillac
Processing Record 17 of Set 1 | kavaratti
Processing Record 18 of Set 1 | ponta do sol
Processing Record 19 of Set 1 | port blair
Processing Record 20 of Set 1 | san vicente
Processing Record 21 of Set 1 | quatre cocos
Processing Record 22 of Set 1 | jalu
Processing Record 23 of Set 1 | ushuaia
Proces

Processing Record 36 of Set 4 | plaridel
Processing Record 37 of Set 4 | sinjar
Processing Record 38 of Set 4 | karratha
Processing Record 39 of Set 4 | mahebourg
Processing Record 40 of Set 4 | bousso
Processing Record 41 of Set 4 | khani
Processing Record 42 of Set 4 | bonavista
Processing Record 43 of Set 4 | kruisfontein
Processing Record 44 of Set 4 | kutum
Processing Record 45 of Set 4 | ankang
Processing Record 46 of Set 4 | aloleng
Processing Record 47 of Set 4 | negotin
Processing Record 48 of Set 4 | constitucion
Processing Record 49 of Set 4 | puerto del rosario
Processing Record 50 of Set 4 | tres arroyos
Processing Record 1 of Set 5 | andevoranto
Oops!! The city not found. Skipping it...
Processing Record 2 of Set 5 | sao felix do xingu
Processing Record 3 of Set 5 | santa eulalia del rio
Oops!! The city not found. Skipping it...
Processing Record 4 of Set 5 | fare
Processing Record 5 of Set 5 | yulara
Processing Record 6 of Set 5 | mar del plata
Processing Record 7 of Set

Processing Record 4 of Set 8 | camana
Processing Record 5 of Set 8 | hambantota
Processing Record 6 of Set 8 | kindu
Processing Record 7 of Set 8 | marawi
Processing Record 8 of Set 8 | baghdad
Processing Record 9 of Set 8 | mananara
Processing Record 10 of Set 8 | victoria
Processing Record 11 of Set 8 | rosario do sul
Processing Record 12 of Set 8 | saint-pierre
Processing Record 13 of Set 8 | kisangani
Processing Record 14 of Set 8 | maceio
Processing Record 15 of Set 8 | vila velha
Processing Record 16 of Set 8 | sambava
Processing Record 17 of Set 8 | rolim de moura
Oops!! The city not found. Skipping it...
Processing Record 18 of Set 8 | lemon tree passage
Processing Record 19 of Set 8 | vila franca do campo
Processing Record 20 of Set 8 | nanortalik
Processing Record 21 of Set 8 | portland
Processing Record 22 of Set 8 | boa vista
Processing Record 23 of Set 8 | burkhala
Oops!! The city not found. Skipping it...
Processing Record 24 of Set 8 | isabela
Processing Record 25 of Set

Processing Record 37 of Set 11 | mamanguape
Processing Record 38 of Set 11 | selikhino
Processing Record 39 of Set 11 | nkwerre
Processing Record 40 of Set 11 | ostersund
Processing Record 41 of Set 11 | bokspits
Oops!! The city not found. Skipping it...
Processing Record 42 of Set 11 | skelleftea
Processing Record 43 of Set 11 | herat
Processing Record 44 of Set 11 | nemuro
Processing Record 45 of Set 11 | shimoda
Processing Record 46 of Set 11 | kibala
Processing Record 47 of Set 11 | bustamante
Processing Record 48 of Set 11 | aswan
Processing Record 49 of Set 11 | san francisco
Processing Record 50 of Set 11 | ardistan
Oops!! The city not found. Skipping it...
Processing Record 1 of Set 12 | calabozo
Processing Record 2 of Set 12 | price
Processing Record 3 of Set 12 | kibakwe
Processing Record 4 of Set 12 | cururupu
Processing Record 5 of Set 12 | kimbe
Processing Record 6 of Set 12 | westpunt
Oops!! The city not found. Skipping it...
Processing Record 7 of Set 12 | along
Processi

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [30]:
# Dataframe creation using the lists we generated from above cell
cities_dict = {'City': city,
               'Latitude' : lat,
               'Longitude' : lon,
               'Max Temperature (°C)': max_temp,
               'Humidity (%)': humidity,
               'Cloudiness (%)': cloudiness,
               'Wind Speed (MPH)': wind_speed,
               'Country': country,
               'Date': date
              }
cities_df = pd.DataFrame(cities_dict)

# Dataframe saved to the csv file
csv_path = '../output_data/cities.csv'
cities_df.to_csv(csv_path)

# CHECKPOINT:  Display the dataframe
cities_df

Unnamed: 0,City,Latitude,Longitude,Max Temperature (°C),Humidity (%),Cloudiness (%),Wind Speed (MPH),Country,Date
0,faya,18.3851,42.4509,17.29,74,99,2.86,SA,1626991079
1,cherskiy,68.7500,161.3000,14.03,77,99,0.82,RU,1626991080
2,takefu,35.9039,136.1669,26.83,81,78,1.33,JP,1626991081
3,rikitea,-23.1203,-134.9692,23.11,72,13,8.12,PF,1626990791
4,uttarkashi,30.7333,78.4500,22.59,83,73,1.25,IN,1626991083
...,...,...,...,...,...,...,...,...,...
535,elko,41.0002,-115.5012,34.01,27,46,7.38,US,1626991837
536,iralaya,15.0000,-83.2333,30.05,78,51,5.86,HN,1626991838
537,shadrinsk,56.0852,63.6335,21.41,75,9,4.89,RU,1626991839
538,taoudenni,22.6783,-3.9836,39.60,15,5,5.99,ML,1626991841


In [31]:
# CHECKPOINT, look at dataframe information
# Show a statistics on the dataframe
cities_df.describe()

Unnamed: 0,Latitude,Longitude,Max Temperature (°C),Humidity (%),Cloudiness (%),Wind Speed (MPH),Date
count,540.0,540.0,540.0,540.0,540.0,540.0,540.0
mean,18.051232,16.468204,20.652981,71.544444,55.151852,3.679852,1626991000.0
std,33.597261,91.492129,7.975469,20.18164,38.310313,2.692886,229.1507
min,-54.8,-175.2,-1.01,11.0,0.0,0.0,1626991000.0
25%,-8.8368,-63.085025,14.175,63.75,14.0,1.79,1626991000.0
50%,21.26305,18.9749,21.885,76.0,60.5,3.005,1626991000.0
75%,46.000275,96.175,26.6325,86.0,95.0,5.0,1626992000.0
max,78.2186,179.3167,39.73,100.0,100.0,19.03,1626992000.0


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [32]:
# Drop the rows of data that have a humidity higher than 100%
cleaned_cities_df = cities_df.loc[cities_df['Humidity (%)'] <= 100]
cleaned_cities_df

Unnamed: 0,City,Latitude,Longitude,Max Temperature (°C),Humidity (%),Cloudiness (%),Wind Speed (MPH),Country,Date
0,faya,18.3851,42.4509,17.29,74,99,2.86,SA,1626991079
1,cherskiy,68.7500,161.3000,14.03,77,99,0.82,RU,1626991080
2,takefu,35.9039,136.1669,26.83,81,78,1.33,JP,1626991081
3,rikitea,-23.1203,-134.9692,23.11,72,13,8.12,PF,1626990791
4,uttarkashi,30.7333,78.4500,22.59,83,73,1.25,IN,1626991083
...,...,...,...,...,...,...,...,...,...
535,elko,41.0002,-115.5012,34.01,27,46,7.38,US,1626991837
536,iralaya,15.0000,-83.2333,30.05,78,51,5.86,HN,1626991838
537,shadrinsk,56.0852,63.6335,21.41,75,9,4.89,RU,1626991839
538,taoudenni,22.6783,-3.9836,39.60,15,5,5.99,ML,1626991841


In [None]:
#  Get the indices of cities that have humidity over 100%.
lat_lngs = []
cities = []

# Random Latitude and Longitude combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
longs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Find/Identify the nearest city for each latitude, longitude combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unizue, then add it to the cities list
    if city not in cities:
        cities.append(city)
# Check        
# Print the city count to confirm sufficient count

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Plot the scatter plot that shows the relation between latitude and max temperature, and include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature (°C)', 
                       title=f'Latitude vs. Max Temperature ({today})')
plt.grid()
plt.savefig('output_data/Latitude_MaxTemperature_Plot.png')


## Latitude vs. Humidity Plot

In [None]:
# Plot the scatter plot that shows the relation between latitude and humidity, and include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', 
                       title=f'Latitude vs. Humidity ({today})')
plt.grid()
plt.savefig('output_data/Latitude_Humidity_Plot.png')

## Latitude vs. Cloudiness Plot

In [None]:
# Plot the scatter plot that shows the relation between latitude and cloudiness, and include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', 
                       title=f'Latitude vs. Cloudiness ({today})')
plt.grid()
plt.savefig('output_data/Latitude_Cloudiness_Plot.png')

## Latitude vs. Wind Speed Plot

In [None]:
# Plot the scatter plot that shows the relation between latitude and wind speed, and include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', 
                       title=f'Latitude vs. Wind Speed ({today})')
plt.grid()
plt.savefig('output_data/Latitude_WindSpeed_Plot.png')

## Linear Regression

In [None]:
# Split the dataframe into two based on the latitude
north_cities_df = cleaned_cities_df.loc[cleaned_cities_df['Latitude'] >= 0]
south_cities_df = cleaned_cities_df.loc[cleaned_cities_df['Latitude'] < 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and max temperature in the northern hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature (°C)', title='Latitude vs. Max Temperature')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Max Temperature (°C)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Max Temperature (°C)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq, (45, 30),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and max temperature in the southern hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature (°C)', title='Latitude vs. Max Temperature')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Max Temperature (°C)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Max Temperature (°C)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-25,10),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and humidity in the northern hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', title='Latitude vs. Humidity')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Humidity (%)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Humidity (%)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(46,15),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and humidity in the southern hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', title='Latitude vs. Humidity')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Humidity (%)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Humidity (%)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-22,18),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and cloudiness in the northern hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', title='Latitude vs. Cloudiness')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Cloudiness (%)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Cloudiness (%)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(45,30),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and cloudiness in the southern hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', title='Latitude vs. Cloudiness')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Cloudiness (%)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Cloudiness (%)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-56,50),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and wind speed in the northern hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', title='Latitude vs. Wind Speed')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Wind Speed (MPH)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Wind Speed (MPH)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(0,13),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and wind speed in the southern hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', title='Latitude vs. Wind Speed')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Wind Speed (MPH)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Wind Speed (MPH)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-22,13),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')