In [1]:
#  Dependencies 
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint

# API Key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Load cities.csv
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-75, 100)
lng_range = (-180, 180)

In [2]:
# Make a list that stores latitudes and longitudes 
lat_lngs = []
cities = []

# Random set of lat lng combos
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city per lat lng 
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    
    if city not in cities:
        cities.append(city)

# Make sure I have at least 500 cities
len(cities)

603

In [3]:
city_7 = cities[8]
city_7

'rikitea'

In [4]:
# Build query URL
units = "imperial"
url = f"http://api.openweathermap.org/data/2.5/weather?q={city_7}&appid={weather_api_key}&units={units}"

url

'http://api.openweathermap.org/data/2.5/weather?q=rikitea&appid=b9243e2d7b71044b8840d4590278accb&units=imperial'

In [5]:
response = requests.get(url).json()
pprint(response)

{'base': 'stations',
 'clouds': {'all': 87},
 'cod': 200,
 'coord': {'lat': -23.12, 'lon': -134.97},
 'dt': 1588654220,
 'id': 4030556,
 'main': {'feels_like': 68.54,
          'grnd_level': 1015,
          'humidity': 71,
          'pressure': 1017,
          'sea_level': 1017,
          'temp': 73.17,
          'temp_max': 73.17,
          'temp_min': 73.17},
 'name': 'Rikitea',
 'sys': {'country': 'PF', 'sunrise': 1588605677, 'sunset': 1588645909},
 'timezone': -32400,
 'weather': [{'description': 'overcast clouds',
              'icon': '04n',
              'id': 804,
              'main': 'Clouds'}],
 'wind': {'deg': 150, 'speed': 16.24}}


In [6]:
#Make lists to append to *Might not need this 
lats = []
longs = []
temps = []
humids = []
pressures = []
winds = []
clouds = []

citiesFound = []
counter = 0

In [None]:
 
units = 'imperial'

# Empty the lists
city_list = []
lats = []
long = []
temp = []
wind = []
humid = []
cloud = []
country = []
pressure = []

#Count to zero for displaying message
count = 0

for city in cities:
    
    count += 1
    # Use a try/except function 
    try: 
        
        query_url = f"http://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units={units}&q={city}"
        response = requests.get(query_url).json()
        
        #Append the list 
        lats.append(response['coord']['lat'])
        long.append(response['coord']['lon'])
        wind.append(response['wind']['speed'])
        temp.append(response['main']['temp'])
        humid.append(response['main']['humidity'])
        cloud.append(response['clouds']['all'])
        country.append(response['sys']['country'])
        pressure.append(response['main']['pressure'])
        
        
        city_list.append(city)
        
        
        print(f"Printing city #{count} out of {len(cities)} -- {city}")

    except Exception as e:
        
        print(f"City #{count} out of {len(cities)} missing {e}... skipping {city}.")
        next
 
    
    #put in time limit to avoid hitting the rate limit 
    time.sleep(2)

Printing city #1 out of 603 -- portland
Printing city #2 out of 603 -- punta arenas
Printing city #3 out of 603 -- avarua
Printing city #4 out of 603 -- cape town
Printing city #5 out of 603 -- hilo
Printing city #6 out of 603 -- upernavik
Printing city #7 out of 603 -- aracati
Printing city #8 out of 603 -- saint-philippe
Printing city #9 out of 603 -- rikitea
Printing city #10 out of 603 -- broome
Printing city #11 out of 603 -- madras
Printing city #12 out of 603 -- atuona
Printing city #13 out of 603 -- ugoofaaru
Printing city #14 out of 603 -- sitka
Printing city #15 out of 603 -- fare
Printing city #16 out of 603 -- coquimbo
Printing city #17 out of 603 -- cherskiy
Printing city #18 out of 603 -- saldanha
Printing city #19 out of 603 -- dikson
Printing city #20 out of 603 -- lebu
Printing city #21 out of 603 -- codrington
Printing city #22 out of 603 -- san jose
Printing city #23 out of 603 -- busselton
Printing city #24 out of 603 -- lorengau
Printing city #25 out of 603 -- grin

In [None]:
counter

In [None]:
weather_df = pd.DataFrame()
weather_df["cities"] = citiesFound
weather_df["latitude"] = lats
weather_df["longitude"] = longs
weather_df["temperature"] = temps
weather_df["humidity"] = humids
weather_df["pressure"] = pressures
weather_df["wind_speed"] = winds
weather_df["cloudiness"] = clouds

weather_df.head()

In [None]:
len(weather_df)

In [None]:
weather_df.describe()

In [None]:
# check the cities that have over 100% humidity
mask = weather_df.humidity <= 100
weather_sub = weather_df.loc[mask].reset_index(drop=True)

weather_sub.head()

In [None]:
# Extract relevant fields from the data frame


# Export the City_Data into a csv
weather_sub.to_csv("weatherPyData.csv", index=False)

In [None]:
#View the new exported csv and check the data

new_df = pd.read_csv("weatherPyData.csv")

new_df.head()

In [None]:
# Scatter Plots (Lat v. Temp, Lat v. Humidity, Lat v. Cloudiness, Lat v. Wind Speed)
# Add explanations under plots
#Lat v. Temp
plt.scatter(new_df.latitude, new_df.temperature, color="purple", marker="*")
plt.xlabel("Latitude")
plt.ylabel("Temperature")
plt.title("Latitude vs Temperature", fontsize=14, fontweight="bold")
plt.show()

In [None]:
#Lat v. Humidity
plt.scatter(new_df.latitude, new_df.humidity, color="red", marker="*")
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.title("Latitude vs Humidity", fontsize=14, fontweight="bold")
plt.show()

In [None]:
#Lat v. Cloudiness
plt.scatter(new_df.latitude, new_df.cloudiness, color="blue", marker="*")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.title("Latitude vs Cloudiness", fontsize=14, fontweight="bold")
plt.show()

In [None]:
#Lat v. Wind Speed
plt.scatter(new_df.latitude, new_df.wind_speed, color="lightblue", marker="*")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.title("Latitude vs Wind Speed", fontsize=14, fontweight="bold")
plt.show()

In [None]:
#Linear Regression for each hemisphere North, South
# north Hemi
northernHemi = new_df.latitude >= 0
north = df.loc[northernHemi].reset_index(drop=True)

north.head()


In [None]:
# south Hemi
southernHemi = new_df.latitude < 0
south = df.loc[southernHemi].reset_index(drop=True)

south.head()

In [None]:
# Latitude vs Temperature (North) w/ r squared 
x_values = north['latitude']
y_values = north['temperature']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, color="purple")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.title("Latitude vs Temperature in the North")
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
plt.show()

In [None]:
# Latitude vs Temperature (South) w/ r squared 
x_values = south['latitude']
y_values = south['temperature']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, color="purple")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Temperature')
plt.title("Latitude vs Temperature in the South")
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
plt.show()

In [None]:
# Latitude vs. Humidity (North) w/ r squared
x_values = north['latitude']
y_values = north['humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title("Latitude vs Humidity in the North")
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
plt.show()

In [None]:
# Latitude vs. Humidity (South) w/ r squared
x_values = south['latitude']
y_values = south['humidity']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title("Latitude vs Humidity in the South")
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
plt.show()

In [None]:
# Latitude vs Cloudiness (North) w/ r squared 
x_values = north['latitude']
y_values = north['cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, color="blue")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title("Latitude vs Cloudiness in the North")
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
plt.show()

In [None]:
# Latitude vs Cloudiness (South) w/ r squared 
x_values = south['latitude']
y_values = south['cloudiness']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, color="blue")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title("Latitude vs Cloudiness in the South")
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
plt.show()

In [None]:
# Latitude vs Wind Speed (North) w/ r squared 
x_values = north['latitude']
y_values = north['wind_speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, color="lightblue")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title("Latitude vs Wind Speed in the North")
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
plt.show()

In [None]:
# Latitude vs Wind Speed (south) w/ r squared
x_values = south['latitude']
y_values = south['wind_speed']
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(x_values,y_values, color="lightblue")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title("Latitude vs Wind Speed in the South")
print(f"The r-squared is: {rvalue**2}")
print(f"The p-value is: {pvalue}")
plt.show()