# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [41]:
# Python APIs Homework
# Georgia Tech Analytics Bootcamp Fall 2021
#
# Import dependencies and setup for the script
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json

# Import the API Keys Google and OpenWeather (not included in upload)
from api_keys import weather_api_key

# Load & use citipy to determine city based on latitude and longitude
try:
    from citipy import citipy
except:
    !pip install citipy
from citipy import citipy

# Use these ranges of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)


## Generate Cities List

In [42]:
# Make the lists for holding lat_lngs and cities
lat_lngs = []
cities = []

# To create a list of random cities with lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Find and identify nearest city for each latitude, longitude combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# CHECKPOINT: Print the city count to confirm sufficient count
print(f'There is {len(cities)} in the list.')
print(f'It will take {round(len(cities)/60, 2)} minutes to ping the server for all cities.')

There is 663 in the list.
It will take 11.05 minutes to ping the server for all cities.


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [43]:
# Get the number of sets needed in the for loop
total_set_number = (len(cities) // 50) + 1

# Set up the lists which will be used to create dataframe
city = []
lat = []
lon = []
city = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []
failed_counter = 0
url = "http://api.openweathermap.org/data/2.5/weather?appid=" + weather_api_key + "&units=metric&q="

# Start to extracting cities' weather info
print('Beginning Data Retrieval')
print('-----------------------------')

for set_counter in range(total_set_num):
    
    # Use try and except block: prevents the code from stopping when the set does not have 50 records
    try:    
        
        # For each record, OpenWeather API will pull the information and append to corresponding lists
        for record_counter in range(50):
            
            # Apply a counter to show the actual index the city is in the cities list 
            counter = set_counter * 50 + record_counter
            
            # CHECKPOINT
            # Print the current set and record index
            print(f'Processing Record {record_counter + 1} of Set {set_counter + 1} | {cities[counter]}')

            # Use try and except block: prevents the code from stopping when a city cannot be found
            try:
                
                # Get the query url and make a call
                query_url = url + cities[counter]
                weather_json = requests.get(query_url, time.sleep(1)).json()
                
             
                # Append to lists
                lat.append(weather_json["coord"]["lat"])
                lon.append(weather_json["coord"]["lon"])
                max_temp.append(weather_json["main"]["temp_max"])
                humidity.append(weather_json["main"]["humidity"])
                cloudiness.append(weather_json["clouds"]["all"])
                wind_speed.append(weather_json["wind"]["speed"])
                country.append(weather_json["sys"]["country"])
                date.append(weather_json["dt"])
                city.append(cities[counter])
            
            # If the city is not found, print it out, and increment to the failed counter
            except KeyError:

                print('Oops!! The city not found. Skipping it...')
                failed_counter += 1
            
            # (For testing)If any other error, print it out
            except:
                
                print('Unexpected Error')
    
    # Break out from the loop once the IndexError was found
    except IndexError:
        
        break
        
# Print out the number of cities found and not found
print('-----------------------------')
print(f'There are {failed_counter} cities not found. {len(cities) - failed_counter} cities in dataframe.')
print('Data Retrieval Complete')      
print('-----------------------------')

Beginning Data Retrieval
-----------------------------
Processing Record 1 of Set 1 | jamestown
Processing Record 2 of Set 1 | mataura
Processing Record 3 of Set 1 | the valley
Processing Record 4 of Set 1 | bredasdorp
Processing Record 5 of Set 1 | new norfolk
Processing Record 6 of Set 1 | masty
Processing Record 7 of Set 1 | kaitangata
Processing Record 8 of Set 1 | punta arenas
Processing Record 9 of Set 1 | upernavik
Processing Record 10 of Set 1 | rikitea
Processing Record 11 of Set 1 | tasiilaq
Processing Record 12 of Set 1 | nikolskoye
Processing Record 13 of Set 1 | mar del plata
Processing Record 14 of Set 1 | piacabucu
Processing Record 15 of Set 1 | cockburn town
Processing Record 16 of Set 1 | ushuaia
Processing Record 17 of Set 1 | puerto leguizamo
Processing Record 18 of Set 1 | kazalinsk
Oops!! The city not found. Skipping it...
Processing Record 19 of Set 1 | bonavista
Processing Record 20 of Set 1 | cayenne
Processing Record 21 of Set 1 | atuona
Processing Record 22 o

Processing Record 32 of Set 4 | kapaa
Processing Record 33 of Set 4 | kijang
Processing Record 34 of Set 4 | samarai
Processing Record 35 of Set 4 | general roca
Processing Record 36 of Set 4 | la asuncion
Processing Record 37 of Set 4 | ostersund
Processing Record 38 of Set 4 | emerald
Processing Record 39 of Set 4 | kodinsk
Processing Record 40 of Set 4 | yarmouth
Processing Record 41 of Set 4 | jiwani
Processing Record 42 of Set 4 | butaritari
Processing Record 43 of Set 4 | ulladulla
Processing Record 44 of Set 4 | challans
Processing Record 45 of Set 4 | faanui
Processing Record 46 of Set 4 | swan hill
Processing Record 47 of Set 4 | jiuquan
Processing Record 48 of Set 4 | son la
Processing Record 49 of Set 4 | northam
Processing Record 50 of Set 4 | celestun
Processing Record 1 of Set 5 | sinnamary
Processing Record 2 of Set 5 | wageningen
Processing Record 3 of Set 5 | urucara
Processing Record 4 of Set 5 | sobolevo
Processing Record 5 of Set 5 | georgetown
Processing Record 6 o

Processing Record 13 of Set 8 | alice springs
Processing Record 14 of Set 8 | nadym
Processing Record 15 of Set 8 | eureka
Processing Record 16 of Set 8 | zhigansk
Processing Record 17 of Set 8 | horadiz
Processing Record 18 of Set 8 | douglas
Processing Record 19 of Set 8 | gambela
Processing Record 20 of Set 8 | north bend
Processing Record 21 of Set 8 | marcona
Oops!! The city not found. Skipping it...
Processing Record 22 of Set 8 | lashio
Processing Record 23 of Set 8 | sitka
Processing Record 24 of Set 8 | neiafu
Processing Record 25 of Set 8 | chiesanuova
Processing Record 26 of Set 8 | butembo
Processing Record 27 of Set 8 | adeje
Processing Record 28 of Set 8 | golden rock
Oops!! The city not found. Skipping it...
Processing Record 29 of Set 8 | acari
Processing Record 30 of Set 8 | hofn
Processing Record 31 of Set 8 | port hardy
Processing Record 32 of Set 8 | shabqadar
Processing Record 33 of Set 8 | moura
Processing Record 34 of Set 8 | soe
Processing Record 35 of Set 8 | i

Processing Record 46 of Set 11 | rio grande
Processing Record 47 of Set 11 | fasa
Processing Record 48 of Set 11 | batemans bay
Processing Record 49 of Set 11 | utiroa
Oops!! The city not found. Skipping it...
Processing Record 50 of Set 11 | doctor pedro p. pena
Oops!! The city not found. Skipping it...
Processing Record 1 of Set 12 | yeppoon
Processing Record 2 of Set 12 | paradwip
Oops!! The city not found. Skipping it...
Processing Record 3 of Set 12 | guskhara
Processing Record 4 of Set 12 | kamenskoye
Oops!! The city not found. Skipping it...
Processing Record 5 of Set 12 | siilinjarvi
Processing Record 6 of Set 12 | port macquarie
Processing Record 7 of Set 12 | deder
Processing Record 8 of Set 12 | hovd
Processing Record 9 of Set 12 | chiredzi
Processing Record 10 of Set 12 | port lincoln
Processing Record 11 of Set 12 | mbanza-ngungu
Processing Record 12 of Set 12 | halalo
Oops!! The city not found. Skipping it...
Processing Record 13 of Set 12 | byron bay
Processing Record 14

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [44]:
# Dataframe creation using the lists we generated from above cell
cities_dict = {'City': city,
               'Latitude' : lat,
               'Longitude' : lon,
               'Max Temperature (°C)': max_temp,
               'Humidity (%)': humidity,
               'Cloudiness (%)': cloudiness,
               'Wind Speed (MPH)': wind_speed,
               'Country': country,
               'Date': date
              }
cities_df = pd.DataFrame(cities_dict)

# Dataframe saved to the csv file
csv_path = '../output_data/cities.csv'
cities_df.to_csv(csv_path)

# CHECKPOINT:  Display the dataframe
cities_df

Unnamed: 0,City,Latitude,Longitude,Max Temperature (°C),Humidity (%),Cloudiness (%),Wind Speed (MPH),Country,Date
0,jamestown,42.0970,-79.2353,22.62,51,1,2.24,US,1626994182
1,mataura,-46.1927,168.8643,6.38,87,12,1.32,NZ,1626994183
2,the valley,18.2170,-63.0578,28.92,83,20,6.69,AI,1626993643
3,bredasdorp,-34.5322,20.0403,6.62,93,29,1.03,ZA,1626994185
4,new norfolk,-42.7826,147.0587,9.95,72,76,0.89,AU,1626994187
...,...,...,...,...,...,...,...,...,...
538,octeville,49.6261,-1.6435,19.92,50,0,5.14,FR,1626994934
539,sao jose da coroa grande,-8.8978,-35.1478,23.47,80,2,4.96,BR,1626994935
540,ipameri,-17.7219,-48.1597,20.56,41,7,2.03,BR,1626994938
541,la macarena,2.1837,-73.7849,21.90,97,89,1.13,CO,1626994939


In [45]:
# CHECKPOINT, look at dataframe information
# Show a statistics on the dataframe
cities_df.describe()

Unnamed: 0,Latitude,Longitude,Max Temperature (°C),Humidity (%),Cloudiness (%),Wind Speed (MPH),Date
count,543.0,543.0,543.0,543.0,543.0,543.0,543.0
mean,18.794263,15.076982,20.21558,71.672192,52.721915,3.477551,1626995000.0
std,33.488419,89.658226,7.857579,19.664326,38.528061,2.456745,243.2916
min,-54.8,-175.2,2.27,10.0,0.0,0.0,1626994000.0
25%,-9.545,-64.23975,13.9,64.0,11.0,1.665,1626994000.0
50%,21.4942,17.4695,21.29,76.0,55.0,2.9,1626995000.0
75%,46.83845,97.3614,26.095,86.0,92.0,4.63,1626995000.0
max,78.2186,179.3167,43.87,100.0,100.0,13.66,1626995000.0


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [46]:
# Drop the rows of data that have a humidity higher than 100%
cleaned_cities_df = cities_df.loc[cities_df['Humidity (%)'] <= 100]
cleaned_cities_df

Unnamed: 0,City,Latitude,Longitude,Max Temperature (°C),Humidity (%),Cloudiness (%),Wind Speed (MPH),Country,Date
0,jamestown,42.0970,-79.2353,22.62,51,1,2.24,US,1626994182
1,mataura,-46.1927,168.8643,6.38,87,12,1.32,NZ,1626994183
2,the valley,18.2170,-63.0578,28.92,83,20,6.69,AI,1626993643
3,bredasdorp,-34.5322,20.0403,6.62,93,29,1.03,ZA,1626994185
4,new norfolk,-42.7826,147.0587,9.95,72,76,0.89,AU,1626994187
...,...,...,...,...,...,...,...,...,...
538,octeville,49.6261,-1.6435,19.92,50,0,5.14,FR,1626994934
539,sao jose da coroa grande,-8.8978,-35.1478,23.47,80,2,4.96,BR,1626994935
540,ipameri,-17.7219,-48.1597,20.56,41,7,2.03,BR,1626994938
541,la macarena,2.1837,-73.7849,21.90,97,89,1.13,CO,1626994939


In [47]:
#  Get the indices of cities that have humidity over 100%.
lat_lngs = []
cities = []

# Random Latitude and Longitude combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
longs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Find/Identify the nearest city for each latitude, longitude combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unizue, then add it to the cities list
    if city not in cities:
        cities.append(city)
# Check        
# Print the city count to confirm sufficient count

In [40]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Plot the scatter plot that shows the relation between latitude and max temperature, and include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature (°C)', 
                       title=f'Latitude vs. Max Temperature ({today})')
plt.grid()
plt.savefig('output_data/Latitude_MaxTemperature_Plot.png')


## Latitude vs. Humidity Plot

In [None]:
# Plot the scatter plot that shows the relation between latitude and humidity, and include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', 
                       title=f'Latitude vs. Humidity ({today})')
plt.grid()
plt.savefig('output_data/Latitude_Humidity_Plot.png')

## Latitude vs. Cloudiness Plot

In [None]:
# Plot the scatter plot that shows the relation between latitude and cloudiness, and include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', 
                       title=f'Latitude vs. Cloudiness ({today})')
plt.grid()
plt.savefig('output_data/Latitude_Cloudiness_Plot.png')

## Latitude vs. Wind Speed Plot

In [None]:
# Plot the scatter plot that shows the relation between latitude and wind speed, and include the date in title
cleaned_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', 
                       title=f'Latitude vs. Wind Speed ({today})')
plt.grid()
plt.savefig('output_data/Latitude_WindSpeed_Plot.png')

## Linear Regression

In [None]:
# Split the dataframe into two based on the latitude
north_cities_df = cleaned_cities_df.loc[cleaned_cities_df['Latitude'] >= 0]
south_cities_df = cleaned_cities_df.loc[cleaned_cities_df['Latitude'] < 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and max temperature in the northern hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature (°C)', title='Latitude vs. Max Temperature')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Max Temperature (°C)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Max Temperature (°C)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq, (45, 30),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and max temperature in the southern hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Max Temperature (°C)', title='Latitude vs. Max Temperature')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Max Temperature (°C)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Max Temperature (°C)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-25,10),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and humidity in the northern hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', title='Latitude vs. Humidity')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Humidity (%)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Humidity (%)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(46,15),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and humidity in the southern hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Humidity (%)', title='Latitude vs. Humidity')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Humidity (%)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Humidity (%)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-22,18),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and cloudiness in the northern hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', title='Latitude vs. Cloudiness')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Cloudiness (%)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Cloudiness (%)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(45,30),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and cloudiness in the southern hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Cloudiness (%)', title='Latitude vs. Cloudiness')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Cloudiness (%)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Cloudiness (%)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-56,50),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and wind speed in the northern hemisphere
north_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', title='Latitude vs. Wind Speed')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_cities_df['Latitude'], north_cities_df['Wind Speed (MPH)'])
regress_values = north_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_cities_df['Latitude'], north_cities_df['Wind Speed (MPH)'])
plt.plot(north_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(0,13),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Plot the linear regression model between latitude and wind speed in the southern hemisphere
south_cities_df.plot(kind='scatter', x='Latitude', y='Wind Speed (MPH)', title='Latitude vs. Wind Speed')

# Find the linear regression model and print it to the scatter plot
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_cities_df['Latitude'], south_cities_df['Wind Speed (MPH)'])
regress_values = south_cities_df['Latitude'] * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_cities_df['Latitude'], south_cities_df['Wind Speed (MPH)'])
plt.plot(south_cities_df['Latitude'],regress_values,"r-")
plt.annotate(line_eq,(-22,13),fontsize=14,color="red")

print(f'The r-squared is {rvalue**2}')
print(f'The linear regression model is {line_eq}')