# Note


* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.


In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

# Generate Cities List


In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

587

# Perform API Calls


* Perform a weather check on each city using a series of successive API calls.

* Include a print log of each city as it'sbeing processed (with the city number and city name).
 

In [3]:
base_url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

# Build partial query URL
query_url = f"{base_url}appid={weather_api_key}&units={units}&q="

In [4]:
number_of_cities = []
temperature = []
lats = []
long = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []
count = 0


for city in cities:

    try:

        response = requests.get(query_url + city).json()
        temperature.append(response['main']['temp_max'])
        hums.append(response['main']['humidity'])
        lats.append(response['coord']['lat'])
        long.append(response['coord']['lon'])
        cloudiness.append(response['clouds']['all'])
        wind_speed.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        number_of_cities.append(city)

        print(f"Processing record {count} | {city}")
        count = count + 1
        
    except:
        print(f"{city} not found.")
        
              

lakatoro not found.
ushuaia not found.
mitsamiouli not found.
port alfred not found.
te anau not found.
bambous virieux not found.
hithadhoo not found.
ballina not found.
hermanus not found.
meyungs not found.
omutinskoye not found.
faanui not found.
mendi not found.
saint-philippe not found.
mataura not found.
mizpe ramon not found.
longyearbyen not found.
palmer not found.
noumea not found.
atuona not found.
mar del plata not found.
khatanga not found.
bluff not found.
maine-soroa not found.
souillac not found.
anloga not found.
yumen not found.
husavik not found.
tasiilaq not found.
east london not found.
bredasdorp not found.
katsuura not found.
nikolskoye not found.
busselton not found.
asau not found.
roxana not found.
hami not found.
qaanaaq not found.
deputatskiy not found.
emerald not found.
rikitea not found.
sentyabrskiy not found.
bonavista not found.
nam tha not found.
iqaluit not found.
vaini not found.
puerto ayora not found.
angoram not found.
albany not found.
bethel n

columbus not found.
stromness not found.
sisimiut not found.
razole not found.
cotonou not found.
cortes not found.
kapiri mposhi not found.
chambersburg not found.
tsiroanomandidy not found.
tumpat not found.
pemangkat not found.
pochutla not found.
kirakira not found.
atka not found.
morant bay not found.
port blair not found.
chumikan not found.
mandalgovi not found.
groningen not found.
panguna not found.
tashla not found.
zhigansk not found.
leningradskiy not found.
ereymentau not found.
camacha not found.
huntingdon not found.
eydhafushi not found.
pangkalanbuun not found.
tigre not found.
fairbanks not found.
telimele not found.
sinnamary not found.
chornukhy not found.
isangel not found.
lasa not found.
fergus falls not found.
richards bay not found.
adrar not found.
newton not found.
faya not found.
beloha not found.
illapel not found.
kupang not found.
iisalmi not found.
margate not found.
les cayes not found.
mwene-ditu not found.
makakilo city not found.
outlook not found.


# Convert Raw Data to DataFrame


* Export the city data into a .csv.

* Display the DataFrame


In [5]:
# create a data frame
cities_df = {
    "City": number_of_cities,
    "Lat": lats,
    "Lng": long,
    "Max Temp": temperature,
    "Humidity": humidity,
    "Cloudiness": cloudiness,
    "Wind Speed": wind_speed,
    "Country": country,
    "Date": date
}
weather_data = pd.DataFrame(cities_df)
weather_data

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date


In [6]:
# export DataFrame to CSV file
weather_data.to_csv("output_data/cities.csv")

In [7]:
weather_data.describe()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
count,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
mean,,,,,,,,,
std,,,,,,,,,
min,,,,,,,,,
25%,,,,,,,,,
50%,,,,,,,,,
75%,,,,,,,,,
max,,,,,,,,,


# Inspect the data and remove the cities where the humidity > 100%.


* Skip this step if there are no cities that have humidity > 100%. 

In [8]:
#  Get the indices of cities that have humidity over 100%.
weather_data.loc[weather_data['Humidity'] > 100]

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date


In [9]:
Int64Index([], dtype='int64')

NameError: name 'Int64Index' is not defined

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".

# Plotting the Data


* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.

* Save the plotted figures as .pngs.


# Latitude vs. Temperature Plot


In [None]:
plt.scatter(weather_data['Latitude'], weather_data['Temperature'])
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.title('Latitude vs. Temperature')

plt.savefig('output_data/Latitude-vs-Temperature.png')

plt.show()



# Latitude vs. Humidity Plot


In [None]:
plt.scatter(weather_data['Latitude'], weather_data['Humidity'])
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('Latitude vs. Humidity')

plt.savefig('output_data/Latitude-vs-Humidity.png')

plt.show()

# Latitude vs. Cloudiness Plot


In [None]:
plt.scatter(weather_data['Latitude'], weather_data['Cloudiness'])
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title('Latitude vs. Cloudiness')

plt.savefig('output_data/Latitude-vs-Cloudiness.png')

plt.show()

# Latitude vs. Wind Speed Plot


In [None]:
plt.scatter(weather_data['Latitude'], weather_data['Wind Speed'])
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title('Latitude vs. Wind Speed')

plt.savefig('output_data/Latitude-vs-Wind_Speed.png')

plt.show()

# Linear Regression


# Northern Hemisphere - Max Temp vs. Latitude Linear Regression


# Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression


# Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression


# Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression


# Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression


# Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression


# Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression
