# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Code to make charts and plots interactive
#%matplotlib notebook

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import os
from scipy.stats import linregress

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy
# For displaying of API data more clearly
from pprint import pprint

#import sys
#from config import api_key
#path = "C:\Users\ithmo\OneDrive\Desktop\API Keys"
#sys.path.insert(0, path)

# Import API key
from api_keys import weather_api_key

import json
#with open('data.json', 'w') as outfile:
    #json.dump(JsonWeatherResponse, outfile)

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)


#with open('data.json') as json_file:
    #data = json.load(json_file)
#pprint(data)


## Generate Cities List

In [3]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=15)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=15)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # Capitalizing the first letter of each city word
    city = city.title()
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)
        
# Print the city count to confirm sufficient count
randomCities = len(cities)
randomCities

14

In [4]:
# Setting up the Dataframe to hold 'cities' values
City_DF = pd.DataFrame(cities)

# Copying original list, in case its needed to be referenced later
CityWeather_DF = City_DF

# Changing first column '0' to 'City' which represents the cities generated
CityWeather_DF = CityWeather_DF.rename(columns={0:"City"})

# Order of columns in new dataframe that will hold location/weather data will be:
# City Country Lat Lng Max Temp	Humidity Cloudiness Wind Speed  Date
CityWeather_DF.head(20)


Unnamed: 0,City
0,Cape Town
1,The Valley
2,Ayagoz
3,Tuktoyaktuk
4,Punta Arenas
5,Belmonte
6,Belushya Guba
7,Mataura
8,Pevek
9,Port Hardy


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [5]:
# SUPPLEMENTAL INFO AND API TESTS

# Creating dynamic URL and checking to see if it works
# Expected output according to API documentation:
# api.openweathermap.org/data/2.5/weather?q={City}&units={unit}&appid={api_key}
# EX) api.openweathermap.org/data/2.5/weather?q=London&units=imperial&appid=APIKEY
# 'units' is 'imperial' for Fahrenheit temp, api key is to be inserted in file "api_keys.py"

# Variables 
#baseURL = "http://api.openweathermap.org/data/2.5/find?q="

#cityName = CityWeather_DF["City"][0]
#cityURL = cityName.replace(" ", "+")
#unitSys = "&units=imperial&appid="

#queryURL = (f'{baseURL}{cityURL}{unitSys}{weather_api_key}') 
#print(queryURL)
#print()

#JsonWeatherResponse = requests.get(queryURL).json()
#pprint(JsonWeatherResponse)


In [6]:
# Start of user facing "data scan"
print()
print(" ----------------------------- ")
print("   BEGINNING DATA RETRIEVAL   ")
print(" ----------------------------- ")
print()

# Index variable to iterate through generated citipy list
nextCity = 0

# Counter to keep track of every 50 succussful city data captures to increase setCounter by 1
recordCounter = 1

# Counter to keep track of how many sets of 50 successful city data captures
setCounter = 1

# Counter to keep track of all times a data API attempt was made (successful or not)
allCityCounter = 1



# For loop to go through however many random cities were generated:
for nextCity in range(randomCities):
   
   # Creating API url dynamically for every city json call/response in city list
   baseURL = "http://api.openweathermap.org/data/2.5/find?q="
   
   # cityName may have spaces""", so removing them for the url link and replacing them with a "+"
   cityName = CityWeather_DF["City"][nextCity]
   
   # using new variable for url to keep the original cityName format for later use in status message
   cityURL = cityName.replace(" ", "+")
   
   unitSys = "&units=imperial&appid="
   queryURL = (f'{baseURL}{cityURL}{unitSys}{weather_api_key}')  
   
   # Copying created API url for current city in another variable to be used in error message
   ReviewURL = queryURL

   JsonWeatherResponse = requests.get(queryURL).json()
   
   # **Reseting Humidity > 100% checker to defauly False
   humidityCheck = False
   


   # Code will try to execute but be cognizant for Exception errors, as well as coded error API checks
   try:

      # recordCounter will reset itself and add 1 to the "Set" display every 50 successful city entries   
      if recordCounter == 51:
         recordCounter = 0
         setCounter += 1

      # if API response returns an incomplete dataset (count=0) or a dataset with too much data that will errors out (ie count = 5)
      # or webpage returns a cod code/Response other than 200 (ie 404, etc). this Code will proactively not enter data in and return an error
      # message/update to user with api http. link and continue to process through loop and counts. These entries/rows will be deleted later 
      if JsonWeatherResponse["count"] > 3 or \
         JsonWeatherResponse["count"] == '0' or \
         JsonWeatherResponse["cod"] != "200":
         
         print()
         print(f'Data for {cityName} is corrupted. Skipping. Can review URL below...')
         print(ReviewURL)
         print()
         
         nextCity += 1
         allCityCounter += 1
         humidityCheck = True

      # Create and populate 9 columns with API data and a humidity check for current city (nextCity/index)
      else:
         CityWeather_DF.loc[nextCity, "Country"] = JsonWeatherResponse['list'][0]["sys"]["country"]
         CityWeather_DF.loc[nextCity, "Lat"] = JsonWeatherResponse['list'][0]["coord"]["lat"]
         CityWeather_DF.loc[nextCity, "Lng"] = JsonWeatherResponse['list'][0]["coord"]["lon"]
         CityWeather_DF.loc[nextCity, "Max Temp"] = JsonWeatherResponse['list'][0]["main"]["temp_max"]
         
         CityWeather_DF.loc[nextCity, "Humidity"] = JsonWeatherResponse['list'][0]["main"]["humidity"]
         # ** Will perform a check on the Humidty value here and "mark it down"
                  
         CityWeather_DF.loc[nextCity, "Cloudiness"] = JsonWeatherResponse['list'][0]["clouds"]["all"]
         CityWeather_DF.loc[nextCity, "Wind Speed"] = JsonWeatherResponse['list'][0]["wind"]["speed"]
         
         # Format data for readability
         CityWeather_DF.loc[nextCity, "Date"] = time.strftime(' %m/%d/%Y', time.localtime(JsonWeatherResponse['list'][0]["dt"]))
         
            # **Perform a check on if Humidity % > 100 (to mark for deletion later)
            # Assigning value to a new column to track
         if CityWeather_DF["Humidity"][nextCity] >= 100.00:
            humidityCheck = True
            CityWeather_DF.loc[nextCity, "Humidity > 100%"] = humidityCheck
         else:
            humidityCheck = False
            CityWeather_DF.loc[nextCity, "Humidity > 100%"] = humidityCheck
            # 'True' columned cities will have their data/ROWS deleted later in a new DF
            # 'False' columned cities wll retain original data. New column will then be deleted during clean
         
         # Message progress detailing number of successful sets, all attempts vs full list, and the city       
         print(f"Processing Record.. { recordCounter } of Set { setCounter }  ({allCityCounter} / {randomCities})   |   {cityName}")

         # Iterate counts
         nextCity += 1
         recordCounter += 1
         allCityCounter += 1
            
      # In the event of an indexerror/incomplet API, no data will be marked and loop/count will be iterated   
   except IndexError:
      print()
      print(f'No Data for {cityName}. Skipping entry...')
      print()
      allCityCounter += 1
      nextCity += 1
      pass

# End of "data scan"
print()
print(" ----------------------------- ")
print("   DATA RETRIEVAL COMPLETE   ")
print(" ----------------------------- ")
print()




 ----------------------------- 
   BEGINNING DATA RETRIEVAL   
 ----------------------------- 

Processing Record.. 1 of Set 1  (1 / 14)   |   Cape Town
Processing Record.. 2 of Set 1  (2 / 14)   |   The Valley
Processing Record.. 3 of Set 1  (3 / 14)   |   Ayagoz
Processing Record.. 4 of Set 1  (4 / 14)   |   Tuktoyaktuk
Processing Record.. 5 of Set 1  (5 / 14)   |   Punta Arenas

Data for Belmonte is corrupted. Skipping. Can review URL below...
http://api.openweathermap.org/data/2.5/find?q=Belmonte&units=imperial&appid=8f818372da0128048522d56de3aaa224


No Data for Belushya Guba. Skipping entry...

Processing Record.. 6 of Set 1  (8 / 14)   |   Mataura
Processing Record.. 7 of Set 1  (9 / 14)   |   Pevek
Processing Record.. 8 of Set 1  (10 / 14)   |   Port Hardy
Processing Record.. 9 of Set 1  (11 / 14)   |   Butaritari
Processing Record.. 10 of Set 1  (12 / 14)   |   Kapaa
Processing Record.. 11 of Set 1  (13 / 14)   |   Srednekolymsk
Processing Record.. 12 of Set 1  (14 / 14)   | 

In [7]:
# Display column counts, calculations overview (including max values ie "Humidity"), and dataframe preview
print(CityWeather_DF.count())
print()
print()
print(CityWeather_DF.describe())
print()
print()
CityWeather_DF.head(20)

City               14
Country            12
Lat                12
Lng                12
Max Temp           12
Humidity           12
Cloudiness         12
Wind Speed         12
Date               12
Humidity > 100%    12
dtype: int64


             Lat        Lng  Max Temp   Humidity  Cloudiness  Wind Speed
count  12.000000   12.00000  12.00000  12.000000   12.000000   12.000000
mean   23.833633   29.88995  34.25500  85.000000   61.916667   10.738333
std    46.917810  133.92260  37.54233   8.290191   38.648670    4.854300
min   -53.150000 -159.31900 -18.40000  73.000000    0.000000    2.300000
25%    -6.178425  -85.04250  -5.24000  77.500000   29.000000    7.290000
50%    35.023300   49.43120  44.60000  88.000000   82.500000   10.940000
75%    67.951025  157.47855  63.50000  91.500000   92.000000   15.097500
max    70.633300  172.79020  81.63000  96.000000  100.000000   17.130000




Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date,Humidity > 100%
0,Cape Town,ZA,-33.9258,18.4232,60.8,94.0,90.0,14.97,03/15/2021,False
1,The Valley,AI,18.217,-63.0578,77.0,73.0,20.0,2.3,03/15/2021,False
2,Ayagoz,KZ,47.9714,80.4392,17.78,96.0,99.0,17.13,03/15/2021,False
3,Tuktoyaktuk,CA,69.4541,-133.0374,-18.4,76.0,75.0,9.22,03/15/2021,False
4,Punta Arenas,CL,-53.15,-70.9167,48.2,93.0,90.0,5.75,03/15/2021,False
5,Belmonte,,,,,,,,,
6,Belushya Guba,,,,,,,,,
7,Mataura,NZ,-46.1927,168.8643,57.0,74.0,98.0,15.99,03/15/2021,False
8,Pevek,RU,69.7008,170.3133,-7.4,91.0,100.0,15.48,03/15/2021,False
9,Port Hardy,CA,50.6996,-127.4199,41.0,80.0,40.0,8.05,03/15/2021,False


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [8]:
# _______ CityWeather_DF _______
# Clean the data of any blanks, duplicates, or rows with null/NaN values.
CityWeather_DF.replace("", np.nan, inplace=True)
CityWeather_DF.replace("NaN", np.nan, inplace=True)
CityWeather_DF = CityWeather_DF.drop_duplicates()
CityWeather_DF = CityWeather_DF.dropna()
# Check to see if all rows have the same count and dataframe preview.
#print(CityWeather_DF.count())
#print()
#CityWeather_DF.head(20)


# Create new copy of dataframe. 
clean_city_dataDF = CityWeather_DF.copy()
# ** See next code snippet regarding new "100% Humidity" DF

# Once the copy is complete, the orignal dataframe will be cleaned of the extra ["Humidity > 100%"] row
# Displaying cleaned "original/spaceless" DF
del CityWeather_DF["Humidity > 100%"]
print(CityWeather_DF.count())
print()
CityWeather_DF.head(20)


City          12
Country       12
Lat           12
Lng           12
Max Temp      12
Humidity      12
Cloudiness    12
Wind Speed    12
Date          12
dtype: int64



Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
0,Cape Town,ZA,-33.9258,18.4232,60.8,94.0,90.0,14.97,03/15/2021
1,The Valley,AI,18.217,-63.0578,77.0,73.0,20.0,2.3,03/15/2021
2,Ayagoz,KZ,47.9714,80.4392,17.78,96.0,99.0,17.13,03/15/2021
3,Tuktoyaktuk,CA,69.4541,-133.0374,-18.4,76.0,75.0,9.22,03/15/2021
4,Punta Arenas,CL,-53.15,-70.9167,48.2,93.0,90.0,5.75,03/15/2021
7,Mataura,NZ,-46.1927,168.8643,57.0,74.0,98.0,15.99,03/15/2021
8,Pevek,RU,69.7008,170.3133,-7.4,91.0,100.0,15.48,03/15/2021
9,Port Hardy,CA,50.6996,-127.4199,41.0,80.0,40.0,8.05,03/15/2021
10,Butaritari,KI,3.0707,172.7902,81.63,78.0,9.0,13.67,03/15/2021
11,Kapaa,US,22.0752,-159.319,71.6,88.0,90.0,12.66,03/15/2021


In [9]:
# _______ clean_city_dataDF _______
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.

# **This DF copy will be cleaned further of cities with +100% humidty by removing rows of True
clean_city_dataDF = clean_city_dataDF[clean_city_dataDF["Humidity > 100%"] != True]
# And then Humidity column itself and display DF summary
del clean_city_dataDF["Humidity > 100%"]
print(clean_city_dataDF.count())
print()
clean_city_dataDF.head(20)

City          12
Country       12
Lat           12
Lng           12
Max Temp      12
Humidity      12
Cloudiness    12
Wind Speed    12
Date          12
dtype: int64



Unnamed: 0,City,Country,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
0,Cape Town,ZA,-33.9258,18.4232,60.8,94.0,90.0,14.97,03/15/2021
1,The Valley,AI,18.217,-63.0578,77.0,73.0,20.0,2.3,03/15/2021
2,Ayagoz,KZ,47.9714,80.4392,17.78,96.0,99.0,17.13,03/15/2021
3,Tuktoyaktuk,CA,69.4541,-133.0374,-18.4,76.0,75.0,9.22,03/15/2021
4,Punta Arenas,CL,-53.15,-70.9167,48.2,93.0,90.0,5.75,03/15/2021
7,Mataura,NZ,-46.1927,168.8643,57.0,74.0,98.0,15.99,03/15/2021
8,Pevek,RU,69.7008,170.3133,-7.4,91.0,100.0,15.48,03/15/2021
9,Port Hardy,CA,50.6996,-127.4199,41.0,80.0,40.0,8.05,03/15/2021
10,Butaritari,KI,3.0707,172.7902,81.63,78.0,9.0,13.67,03/15/2021
11,Kapaa,US,22.0752,-159.319,71.6,88.0,90.0,12.66,03/15/2021


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [10]:
# Summary display of CityWeather_DF dataframe and export to .csv
print(CityWeather_DF.describe())
print()
CityWeather_DF.to_csv('../output_data/CityWeather.csv')
print(CityWeather_DF.head(20))
print()


             Lat        Lng  Max Temp   Humidity  Cloudiness  Wind Speed
count  12.000000   12.00000  12.00000  12.000000   12.000000   12.000000
mean   23.833633   29.88995  34.25500  85.000000   61.916667   10.738333
std    46.917810  133.92260  37.54233   8.290191   38.648670    4.854300
min   -53.150000 -159.31900 -18.40000  73.000000    0.000000    2.300000
25%    -6.178425  -85.04250  -5.24000  77.500000   29.000000    7.290000
50%    35.023300   49.43120  44.60000  88.000000   82.500000   10.940000
75%    67.951025  157.47855  63.50000  91.500000   92.000000   15.097500
max    70.633300  172.79020  81.63000  96.000000  100.000000   17.130000

             City Country      Lat       Lng  Max Temp  Humidity  Cloudiness  \
0       Cape Town      ZA -33.9258   18.4232     60.80      94.0        90.0   
1      The Valley      AI  18.2170  -63.0578     77.00      73.0        20.0   
2          Ayagoz      KZ  47.9714   80.4392     17.78      96.0        99.0   
3     Tuktoyaktuk     

In [11]:
# Summary display of clean_city_dataDF dataframe and export to .csv
print(clean_city_dataDF.describe())
print()
clean_city_dataDF.to_csv('../output_data/CleanCityWeatherData.csv')
print(clean_city_dataDF.head(20))
print()


             Lat        Lng  Max Temp   Humidity  Cloudiness  Wind Speed
count  12.000000   12.00000  12.00000  12.000000   12.000000   12.000000
mean   23.833633   29.88995  34.25500  85.000000   61.916667   10.738333
std    46.917810  133.92260  37.54233   8.290191   38.648670    4.854300
min   -53.150000 -159.31900 -18.40000  73.000000    0.000000    2.300000
25%    -6.178425  -85.04250  -5.24000  77.500000   29.000000    7.290000
50%    35.023300   49.43120  44.60000  88.000000   82.500000   10.940000
75%    67.951025  157.47855  63.50000  91.500000   92.000000   15.097500
max    70.633300  172.79020  81.63000  96.000000  100.000000   17.130000

             City Country      Lat       Lng  Max Temp  Humidity  Cloudiness  \
0       Cape Town      ZA -33.9258   18.4232     60.80      94.0        90.0   
1      The Valley      AI  18.2170  -63.0578     77.00      73.0        20.0   
2          Ayagoz      KZ  47.9714   80.4392     17.78      96.0        99.0   
3     Tuktoyaktuk     

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [25]:
plt.scatter(CityWeather_DF["Lat"], CityWeather_DF["Max Temp"], facecolor="red", edgecolors="orange",
           alpha=0.75, s=CityWeather_DF["Max Temp"])
plt.title("City Latitude vs. Max Temperature 03/15/2021")
plt.grid()


plt.xlim(-80,80)
plt.ylim(10,110)

plt.xlabel("Latitude")
plt.ylabel("Max Temperature")
#plt.savefig("../Image/Lat_vs_MaxTemp.png", bbox_inches="tight")
plt.show()

<IPython.core.display.Javascript object>

## Latitude vs. Humidity Plot

In [27]:
plt.scatter(CityWeather_DF["Lat"], CityWeather_DF["Humidity"], facecolor="yellow", edgecolors="black",
           alpha=1, s=CityWeather_DF["Humidity"])

plt.title("City Latitude vs. Humidity 03/15/2021")
plt.grid()

plt.xlim(-80,80)
plt.ylim(10,110)

plt.xlabel("Latitude")
plt.ylabel("Humidity")
#plt.savefig("../Image/Lat_vs_Humidity.png", bbox_inches="tight")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(CityWeather_DF["Lat"], CityWeather_DF["Cloudiness"], facecolor="blue", edgecolors="white",
           alpha=1, s=CityWeather_DF["Cloudiness"])

plt.title("City Latitude vs. Cloudiness 03/15/2021")
plt.grid()

plt.xlim(-80,80)
plt.ylim(10,110)

plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
#plt.savefig("../Image/Lat_vs_Cloudiness.png", bbox_inches="tight")
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(CityWeather_DF["Lat"], CityWeather_DF["Wind Speed"], facecolor="green", edgecolors="gray",
           alpha=1, s=CityWeather_DF["Wind Speed"])

plt.title("City Latitude vs. Wind Speed 03/15/2021")
plt.grid()

plt.xlim(-80,80)
plt.ylim(10,110)

plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
#plt.savefig("../Image/Lat_vs_WindSpeed.png", bbox_inches="tight")
plt.show()

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression