### Dependencies

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import json
import requests
import time
from scipy.stats import linregress
import scipy.stats as st

# Import API key
from config import api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

### Generate Cities List 

In [2]:
 # List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)
# print(cities)

614

### Perform API Calls 

In [4]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
query_url = f"{url}appid={api_key}&units={units}&q="

In [None]:
temp_test = []
city_test = ['chicago', 'miami', 'london', 'xxx']
good_cities = []
for city in city_test:
    response = requests.get(query_url + city).json()
    try:
        temp_test.append(response["main"]["temp"])
        good_cities.append(city)
        print(f"{city} found. Temp appending")
    except KeyError:
        print(f"no temp for {city}")
print(response)
print(temp_test)
print(good_cities)


In [6]:
ok_cities = []
ok_lat = []
ok_lon = []
ok_maxtemp = []
ok_mintemp = []
ok_humidity = []
ok_clouds = []
ok_wind = []
ok_country = []
ok_date = []

for city in cities:
    response = requests.get(query_url + city).json()
    try:
        ok_cities.append(response["name"])
        ok_lat.append(response["coord"]["lat"])
        ok_lon.append(response["coord"]["lon"])
        ok_maxtemp.append(response["main"]["temp_max"])
        ok_mintemp.append(response["main"]["temp_min"])
        ok_humidity.append(response["main"]["humidity"])
        ok_clouds.append(response["clouds"]["all"])
        ok_wind.append(response["wind"]["speed"])
        ok_country.append(response["sys"]["country"])
        ok_date.append(response["dt"])
        print(f"success appending {city}")
   
    except KeyError:
        print(f"no data found for {city}")
print(response)

success appending bambous virieux
success appending bam
success appending adjumani
success appending rikitea
no data found for attawapiskat
success appending coahuayana
success appending castro
no data found for vaitupu
success appending uvarovka
success appending iqaluit
success appending bantogon
success appending yanam
success appending tiksi
success appending russell
success appending konstantinovka
success appending vanavara
success appending bonavista
success appending cherskiy
success appending hermanus
success appending nikolskoye
success appending ushuaia
success appending sao filipe
success appending ancud
success appending saint-philippe
success appending port-gentil
success appending reforma
success appending mataura
success appending tura
success appending pangkalanbuun
success appending east london
success appending miraflores
success appending port macquarie
success appending hobart
success appending ponta do sol
success appending nevers
success appending bluff
success a

success appending poum
success appending kaitangata
success appending chincha alta
no data found for lolua
success appending asau
no data found for belushya guba
success appending kasangulu
success appending peniche
success appending kyrnasivka
success appending te anau
no data found for imisli
success appending ohara
success appending voyvozh
success appending campos do jordao
success appending teguise
success appending vila franca do campo
success appending kavieng
success appending bilgoraj
success appending marzuq
success appending dakar
success appending mataram
success appending parkes
success appending paamiut
success appending puerto del rosario
success appending lompoc
success appending dunedin
success appending isangel
success appending deputatskiy
success appending tazovskiy
success appending moree
success appending aracaju
success appending kaka
success appending aksu
success appending maragogi
success appending nuuk
success appending nioro
success appending cassilandia
suc

success appending dalby
success appending hambantota
success appending two hills
success appending camana
success appending beterou
success appending freeport
success appending bandarbeyla
success appending buala
success appending neuquen
{'coord': {'lon': -68.0591, 'lat': -38.9516}, 'weather': [{'id': 800, 'main': 'Clear', 'description': 'clear sky', 'icon': '01n'}], 'base': 'stations', 'main': {'temp': 71.6, 'feels_like': 64.38, 'temp_min': 71.6, 'temp_max': 71.6, 'pressure': 1012, 'humidity': 33}, 'visibility': 10000, 'wind': {'speed': 9.22, 'deg': 250}, 'clouds': {'all': 0}, 'dt': 1611718221, 'sys': {'type': 1, 'id': 8310, 'country': 'AR', 'sunrise': 1611740281, 'sunset': 1611791510}, 'timezone': -10800, 'id': 3843123, 'name': 'Neuquén', 'cod': 200}


In [None]:
print(len(ok_cities))
print(len(ok_lat))
print(len(ok_lon))
print(len(ok_maxtemp))
print(len(ok_mintemp))
print(len(ok_humidity))
print(len(ok_clouds))
print(len(ok_wind))
print(len(ok_country))
print(len(ok_date))

### Convert Raw Data to DataFrame 

In [None]:
weather_dict = {"City": ok_cities, "Latitude": ok_lat, "Longitude": ok_lon, "Max Temp": ok_maxtemp, 
                "Humidity": ok_humidity, "Cloudiness": ok_clouds, "Wind Speed": ok_wind, 
                "Country": ok_country, "Date": ok_date}

weather_df = pd.DataFrame(weather_dict)
weather_df

### Inspect data and remove cities where the humidty is > 100%

In [None]:
low_hum = pd.DataFrame(weather_df[weather_df["Humidity"]< 100])
low_hum

## Plotting the Data 

### Latitude vs. Temperature Plot 

In [None]:
plot_lat = low_hum_df['Latitude']
plot_temp = low_hum_df["Max Temp"]

plt.scatter(plot_lat, plot_temp, marker="o", facecolors = "blue", edgecolors="black", 
            alpha=.99)

plt.grid(color="grey", linestyle="-", linewidth=1)
plt.title("Latitude vs. Temperature")
plt.xlabel("Latitude")
plt.ylabel("Temperature")

### Latitude vs. Humidty Plot

In [None]:
plot_lat = low_hum_df['Latitude']
plot_humidity = low_hum_df["Humidity"]

plt.scatter(plot_lat, plot_humidity, marker="o", facecolors = "blue", edgecolors="black", 
            alpha=.99)

plt.grid(color="grey", linestyle="-", linewidth=1)
plt.title(f"Latitude vs. Humidity")
plt.xlabel("Latitude")
plt.ylabel("Humidity")

### Latitude vs. Cloudiness Plot

In [None]:
plot_lat = low_hum_df['Latitude']
plot_cloud = low_hum_df["Cloudiness"]

plt.scatter(plot_lat, plot_cloud, marker="o", facecolors = "blue", edgecolors="black", 
            alpha=.99)

plt.grid(color="grey", linestyle="-", linewidth=.1)
plt.title(f"Latitude vs. Cloudiness")
plt.xlabel("Latitude")
plt.ylabel("Humidity")

###  Latitude vs. Wind Speed Plot

In [None]:
plot_lat = low_hum_df['Latitude']
plot_wind = low_hum_df["Wind Speed"]

plt.scatter(plot_lat, plot_wind, marker="o", facecolors = "blue", edgecolors="black", 
            alpha=.99)

plt.grid(color="grey", linestyle="-", linewidth=.5)
plt.title(f"Latitude vs. Wind Speed")
plt.xlabel("Latitude")
plt.ylabel("Humidity")

## Linear Regression

In [None]:
north_hem = pd.DataFrame(low_hum[low_hum["Latitude"]> 0])
north_hem.head()

In [None]:
north_lat = north_hem["Latitude"]
north_maxtemp = north_hem["Max Temp"]
north_humid = north_hem["Humidity"]
north_cloud = north_hem["Cloudiness"]
north_wind = north_hem["Wind Speed"]

In [None]:
south_hem = pd.DataFrame(low_hum[low_hum["Latitude"]< 0])
south_hem.head()

In [None]:
south_lat = south_hem["Latitude"]
south_maxtemp = south_hem["Max Temp"]
south_humid = south_hem["Humidity"]
south_cloud = south_hem["Cloudiness"]
south_wind = south_hem["Wind Speed"]

### Northern Hemisphere: Max Temp vs. Latitude Linear Regression 

In [None]:
correlation = st.pearsonr(north_lat, north_maxtemp)
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(north_lat, north_maxtemp)
regress_values = north_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_lat, north_maxtemp)
plt.plot(north_lat,regress_values, "r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.title("Northern Hemisphere: Latitude vs. Max Temp")
print(f"The r-value is {round(correlation[0],2)}")
plt.show()

### Southern Hemisphere: Max Temp vs. Latitude Linear Regression 

In [None]:
correlation = st.pearsonr(south_lat, south_maxtemp)
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(south_lat, south_maxtemp)
regress_values = south_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_lat, south_maxtemp)
plt.plot(south_lat,regress_values, "r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp')
plt.title("Southern Hemisphere: Latitude vs. Max Temp")
print(f"The r-value is {round(correlation[0],2)}")
plt.show()

### Northern Hemisphere: Humidty (%) vs. Latitude Linear Regression 

In [None]:
correlation = st.pearsonr(north_lat, north_humid)
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(north_lat, north_humid)
regress_values = north_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_lat, north_humid)
plt.plot(north_lat,regress_values, "r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title("Northern Latitude vs. Humidity")
print(f"The r-value is {round(correlation[0],2)}")
plt.show()

### Southern Hemisphere: Humidty (%) vs. Latitude Linear Regression 

In [None]:
correlation = st.pearsonr(south_lat, south_humid)
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(south_lat, south_humid)
regress_values = south_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_lat, south_humid)
plt.plot(south_lat,regress_values, "r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title("Southern Hemisphere: Latitude vs. Humidity")
print(f"The r-value is {round(correlation[0],2)}")
plt.show()

### Northern Hemisphere: Cloudiness (%) vs. Latitude Linear Regression 

In [None]:
correlation = st.pearsonr(north_lat, north_cloud)
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(north_lat, north_cloud)
regress_values = north_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_lat, north_cloud)
plt.plot(north_lat,regress_values, "r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title("Latitude vs. Cloudiness")
print(f"The r-value is {round(correlation[0],2)}")
plt.show()

### Southern Hemisphere: Cloudiness (%) vs. Latitude Linear Regression 

In [None]:
correlation = st.pearsonr(south_lat, south_cloud)
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(south_lat, south_cloud)
regress_values = south_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_lat, south_cloud)
plt.plot(south_lat,regress_values, "r-")
plt.annotate(line_eq,(0,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.title("Southern Hemisphere Latitude vs. Cloudiness")
print(f"The r-value is {round(correlation[0],2)}")
plt.show()

### Northern Hemisphere: Wind Speed (mph) vs. Latitude Linear Regression 

In [None]:
correlation = st.pearsonr(north_lat, north_wind)
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(north_lat, north_wind)
regress_values = north_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(north_lat, north_wind)
plt.plot(north_lat,regress_values, "r-")
# plt.annotate(line_eq,(0,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title("Nortern Hemisphere: Latitude vs. Wind Speed")
print(f"The r-value is {round(correlation[0],2)}")
plt.show()

### Southern Hemisphere: Wind Speed (mph) vs. Latitude Linear Regression 

In [None]:
correlation = st.pearsonr(south_lat, south_wind)
(slope, intercept, rvalue, pvalue, stderr) = stats.linregress(south_lat, south_wind)
regress_values = south_lat * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.scatter(south_lat, south_wind)
plt.plot(south_lat,regress_values, "r-")
# plt.annotate(line_eq,(0,50),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed')
plt.title("Southern Hemisphere: Latitude vs. Wind Speed")
print(f"The r-value is {round(correlation[0],2)}")
plt.show()