In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import json
import scipy.stats
from scipy.stats import linregress

In [2]:
# Import API key
from api_key import weather_api_key

In [3]:
# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

In [4]:
# Output File (CSV)
output_data_file = "output_data/cities.csv"

In [5]:
# Range of latitudes and longitudes
lat_range = (-90, 90)
lat_range = (-180, 180)

# Generate Cities List

In [6]:
# Create a set of random lat and lng combinations
lat_lngs = []

lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

In [7]:
# Identify nearest city for each lat, lng combination
cities = []

for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to our cities list
    if city not in cities:
         cities.append(city)            
# cities

# Perform API Calls

In [8]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "imperial"
query_url = f"{url}appid={weather_api_key}&units={units}&q="
# query_url

In [None]:
lat = []
lon = []
max_temp = []
humidity = []
cloudiness = []

wind_speed = []
date = []
country = []
city_name = []

not_located = []
except_set = 1

print('Beginning Data Rerieval')
print('-----------------------')

for index, city in enumerate(cities, start=1):
    try:
        
        response = requests.get(query_url + city).json()
        # Request will be pause for 2 seconds   
        time.sleep(2)
        max_temp.append(response["main"]["temp_max"])
        humidity.append(response["main"]["humidity"])
        cloudiness.append(response["clouds"]["all"])
        
        wind_speed.append(response["wind"]["speed"])
        date.append(response["dt"])
        country.append(response["sys"]["country"])
        lat.append(response["coord"]["lat"])
        lon.append(response["coord"]["lon"])
        city_name.append(city)
        print(f'Processing Record {index} of Set {except_set} | {city}')
        
    except:
        
        not_located.append(city)
        print(f"city({city}) not found")
        except_set+= 1
print('-----------------------')
print('Data Retrieval Complete')
print('-----------------------')


Beginning Data Rerieval
-----------------------
Processing Record 1 of Set 1 | san patricio
Processing Record 2 of Set 1 | kodiak
Processing Record 3 of Set 1 | rikitea
Processing Record 4 of Set 1 | belaya gora
Processing Record 5 of Set 1 | dingle
Processing Record 6 of Set 1 | xingyi
Processing Record 7 of Set 1 | chuy
Processing Record 8 of Set 1 | khatanga
Processing Record 9 of Set 1 | hithadhoo
Processing Record 10 of Set 1 | qaanaaq
Processing Record 11 of Set 1 | great yarmouth
Processing Record 12 of Set 1 | biak
Processing Record 13 of Set 1 | benfleet
Processing Record 14 of Set 1 | shahr-e kord
Processing Record 15 of Set 1 | nurota
city(tsihombe) not found
Processing Record 17 of Set 2 | deputatskiy
Processing Record 18 of Set 2 | leh
Processing Record 19 of Set 2 | avarua
Processing Record 20 of Set 2 | aklavik
Processing Record 21 of Set 2 | ekuvukeni
Processing Record 22 of Set 2 | atuona
Processing Record 23 of Set 2 | mataura
Processing Record 24 of Set 2 | batemans 

Processing Record 199 of Set 17 | kidal
Processing Record 200 of Set 17 | baft
city(haibowan) not found
Processing Record 202 of Set 18 | kingston
Processing Record 203 of Set 18 | west wendover
Processing Record 204 of Set 18 | regina
Processing Record 205 of Set 18 | namibe
Processing Record 206 of Set 18 | ornskoldsvik
Processing Record 207 of Set 18 | pietarsaari
Processing Record 208 of Set 18 | kolpashevo
Processing Record 209 of Set 18 | dossor
Processing Record 210 of Set 18 | makakilo city
Processing Record 211 of Set 18 | severo-kurilsk
Processing Record 212 of Set 18 | xining
Processing Record 213 of Set 18 | konevo
Processing Record 214 of Set 18 | kavieng
Processing Record 215 of Set 18 | meulaboh
Processing Record 216 of Set 18 | aksu
Processing Record 217 of Set 18 | parrita
city(meyungs) not found
Processing Record 219 of Set 19 | jega
Processing Record 220 of Set 19 | waw
Processing Record 221 of Set 19 | shigony
city(samusu) not found
Processing Record 223 of Set 20 |

Processing Record 397 of Set 34 | saint-ambroise
city(kuche) not found
Processing Record 399 of Set 35 | ondjiva
Processing Record 400 of Set 35 | taunggyi
Processing Record 401 of Set 35 | inongo
Processing Record 402 of Set 35 | mayo
Processing Record 403 of Set 35 | tessalit
Processing Record 404 of Set 35 | wisconsin rapids
Processing Record 405 of Set 35 | labuhan
city(fevralsk) not found
Processing Record 407 of Set 36 | naze
Processing Record 408 of Set 36 | kalat
Processing Record 409 of Set 36 | pinega
Processing Record 410 of Set 36 | sokolovyy
Processing Record 411 of Set 36 | richards bay
city(ijaki) not found
Processing Record 413 of Set 37 | cockburn town
Processing Record 414 of Set 37 | katherine
Processing Record 415 of Set 37 | oviedo
Processing Record 416 of Set 37 | adrar
Processing Record 417 of Set 37 | yumen
Processing Record 418 of Set 37 | margate
Processing Record 419 of Set 37 | hammerfest
Processing Record 420 of Set 37 | zhaotong
Processing Record 421 of Se

In [None]:
# Calculate number of cities found
found = len(cities)
not_found = len(not_located)
total_found = found - not_found
print(f'Cities were found {total_found}')  


# Create Dataframe to store requests from those lists

In [None]:
#format the column of dates of the df
dates = []
for d in date:
    date_test = d
    date_conversion = time.gmtime(date_test)
    date_format = time.strftime('%m/%d/%Y',date_conversion)
    dates.append(date_format)
# dates

In [None]:
# dates

In [None]:
# create a data frame from cities, lat, and temp
weather_dict = {
    "city": city_name,
    "cloudiness":cloudiness,
    "Country":country,
    "date":dates,
    "humid":humidity,
    "lat": lat,
    "lng":lon,
    "max_temp": max_temp,
    "wind_speed":wind_speed,
}

weather_data = pd.DataFrame(weather_dict)
weather_df = weather_data.rename(columns={
    "city":"City",
    "cloudiness":"Cloudiness",
    "date":"Date",
    "humid":"Humidity (%)",
    "lng":"Longitude",
    "max_temp":"Max Temperature",
    "wind_speed":"Wind Speed",
    "lat":"Longitude"
 })
weather_data.to_csv("output_data/cities.csv", index = False)
count_weather_data = weather_df.count()


In [None]:
weather_data.head()

# Create scatter plots 

Latitude vs. Max Temperature

In [None]:
# Build a scatter plot for each data type
title_date = dates[0]
plt.scatter(weather_data["lat"], weather_data["max_temp"], marker = 'o',
           edgecolor='red', alpha=.7)

# Incorporate the other graph properties
plt.title(f"City Latitude vs. Temperature({title_date})")
plt.ylabel("Max Temperature (F)")
plt.xlabel("Latitude")
plt.grid(alpha=0.2)

#print graph to output
plt.savefig('output_data/latitude_MaxTemp.png')

# Show plot
# plt.show()


Latitude vs. Humidity

In [None]:
# Build a scatter plot for each data type
title_date = dates[0]
plt.scatter(weather_data["lat"], weather_data["humid"], 
            edgecolor='green', alpha=.7)

# Incorporate the other graph properties
plt.title(f"City Latitude vs. Humidity({title_date})")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(alpha=0.2)

#print graph to output
plt.savefig('output_data/latitude_Humidity.png')

# Show plot
plt.show()



Latitude vs. Cloudiness

In [None]:
# Build a scatter plot for each data type
plt.scatter(weather_data["lat"], weather_data["cloudiness"],
           edgecolor='blue', alpha=.8)

# Incorporate the other graph properties
plt.title(f"City Latitude vs. Cloudiness ({title_date})")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(alpha=0.2)

#print graph to output
plt.savefig('output_data/latitude_Cloudiness.png')

# Show plot
plt.show()

#print graph to output
plt.savefig('output_data/latitude_Cloudiness.png')

Latitude vs. Wind Speed

In [None]:
# Build a scatter plot for each data type
plt.scatter(weather_data["lat"], weather_data["wind_speed"],
            edgecolor='black', alpha=.5)

# Incorporate the other graph properties
plt.title(f"City Latitude vs. Wind Speed ({title_date})")
plt.ylabel("Wind Speed (MPH)")
plt.xlabel("Latitude")
plt.grid(alpha=0.2)

#print graph to output
plt.savefig('output_data/latitude_WindSpeed.png')

# Show plot
plt.show()

# Northern and Southern Hemisphere

In [None]:
# weather_data.head(3)

In [None]:
# Select data for northern and southern
# Use df.loc[df['column_name']=='condition'] to create criteria
# and put into new df
northern_df = weather_data.loc[weather_data["lat"]>=0]
southern_df = weather_data.loc[weather_data["lat"]<0]
# northern_df
# southern_df

# Linear Regression

Northern Hemisphere 
(Max Temperature vs Latitude)

In [None]:
# Use df.loc[:,'cloumns'] to select specific column
north_temp = northern_df.loc[:,'max_temp']
north_lat = northern_df.loc[:,'lat']
# north_temp
# north_lat

# Calculate the linear regression using scipy.stats library
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat,north_temp)
print(f'The correlation factor is {round(rvalue,2)}')

In [None]:
plt.scatter(north_lat,north_temp)
plt.title(f'Northern Hemisphere\n Max Temp vs. Latitude Linear Regression\n ({dates[0]})')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.grid(alpha=0.2)

regress = north_lat * slope + intercept
# regress

# Calculate y equation
line_equation = print(f"y = {round(slope, 2)} x + {round(intercept,2)}")

plt.plot(north_lat, regress, 'r-')
plt.savefig("output_data/northLat_maxTemp.png")

Sothern Hemisphere (Max Temperature vs Latitude)

In [None]:
# Use df.loc[:,'cloumns'] to select specific column
south_temp = southern_df.loc[:,'max_temp']
south_lat = southern_df.loc[:,'lat']
# south_temp
# south_lat

# Calculate the linear regression using scipy.stats library
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_lat,south_temp)
print(f'The correlation factor is {round(rvalue,2)}')

In [None]:
plt.scatter(south_lat,south_temp)
plt.title(f'Southern Hemisphere\n Max Temp vs. Latitude Linear Regression\n ({dates[0]})')
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.grid(alpha=0.2)

regress = south_lat * slope + intercept
# regress

# Calculate y equation
line_equation = print(f"y = {round(slope, 2)} x + {round(intercept,2)}")

plt.plot(south_lat, regress, 'r-')
plt.savefig("output_data/southLat_maxTemp.png")

Northern Hemisphere - Humidity (%) vs. Latitude

In [None]:
north_humid = northern_df.loc[:,'humid']
north_lat = northern_df.loc[:,'lat']

# Calculate the linear regression using scipy.stats library
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat,north_humid)
print(f'The correlation factor is {round(rvalue,2)}')


In [None]:
plt.scatter(north_lat,north_humid)
plt.title(f'Northern Hemisphere\n Humidity (%) vs. Latitude Linear Regression\n ({dates[0]})')
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.grid(alpha=0.2)

regress = north_lat * slope + intercept
# regress

# Calculate y equation
line_equation = print(f"y = {round(slope, 2)} x + {round(intercept,2)}")

plt.plot(north_lat, regress, 'r-')
plt.savefig("output_data/northLat_humidity.png")

Southern Hemisphere - Humidity (%) vs. Latitude

In [None]:
south_temp = southern_df.loc[:,'humid']
south_lat = southern_df.loc[:,'lat']

# Calculate the linear regression using scipy.stats library
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_lat,south_temp)
print(f'The correlation factor is {round(rvalue,2)}')

In [None]:
plt.scatter(south_lat,south_temp)
plt.title(f'Southern Hemisphere\n Humidty (%) vs. Latitude Linear Regression\n ({dates[0]})')
plt.xlabel('Latitude')
plt.ylabel('Humidty (%)')
plt.grid(alpha=0.2)

regress = south_lat * slope + intercept
# regress

# Calculate y equation
line_equation = print(f"y = {round(slope, 2)} x + {round(intercept,2)}")

plt.plot(south_lat, regress, 'r-')
plt.savefig("output_data/southLat_humidity.png")

Northern Hemisphere - Cloudiness vs. Latitude

In [None]:
north_humid = northern_df.loc[:,'cloudiness']
north_lat = northern_df.loc[:,'lat']

# Calculate the linear regression using scipy.stats library
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat,north_humid)
print(f'The correlation factor is {round(rvalue,2)}')


In [None]:
plt.scatter(north_lat,north_humid)
plt.title(f'Northern Hemisphere\n Cloudiness vs. Latitude Linear Regression\n ({dates[0]})')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.grid(alpha=0.2)

regress = north_lat * slope + intercept
# regress

# Calculate y equation
line_equation = print(f"y = {round(slope, 2)} x + {round(intercept,2)}")

plt.plot(north_lat, regress, 'r-')
plt.savefig("output_data/northLat_humidity.png")

Southern Hemisphere - Cloudiness vs. Latitude

In [None]:
south_temp = southern_df.loc[:,'cloudiness']
south_lat = southern_df.loc[:,'lat']

# Calculate the linear regression using scipy.stats library
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_lat,south_temp)
print(f'The correlation factor is {round(rvalue,2)}')

In [None]:
plt.scatter(south_lat,south_temp)
plt.title(f'Southern Hemisphere\n Cloudiness vs. Latitude Linear Regression\n ({dates[0]})')
plt.xlabel('Latitude')
plt.ylabel('Cloudiness')
plt.grid(alpha=0.2)

regress = south_lat * slope + intercept
# regress

# Calculate y equation
line_equation = print(f"y = {round(slope, 2)} x + {round(intercept,2)}")

plt.plot(south_lat, regress, 'r-')
plt.savefig("output_data/southLat_humidity.png")

Northern Hemisphere - Wind Speed (MPH) vs. Latitude

In [None]:
north_humid = northern_df.loc[:,'wind_speed']
north_lat = northern_df.loc[:,'lat']

# Calculate the linear regression using scipy.stats library
(slope, intercept, rvalue, pvalue, stderr) = linregress(north_lat,north_humid)
print(f'The correlation factor is {round(rvalue,2)}')


In [None]:
plt.scatter(north_lat,north_humid)
plt.title(f'Northern Hemisphere\n Wind Speed (MPH) vs. Latitude Linear Regression\n ({dates[0]})')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (MPH)')
plt.grid(alpha=0.2)

regress = north_lat * slope + intercept
# regress

# Calculate y equation
line_equation = print(f"y = {round(slope, 2)} x + {round(intercept,2)}")

plt.plot(north_lat, regress, 'r-')
plt.savefig("output_data/northLat_humidity.png")

Southern Hemisphere - Wind Speed (MPH) vs. Latitude

In [None]:
south_temp = southern_df.loc[:,'wind_speed']
south_lat = southern_df.loc[:,'lat']

# Calculate the linear regression using scipy.stats library
(slope, intercept, rvalue, pvalue, stderr) = linregress(south_lat,south_temp)
print(f'The correlation factor is {round(rvalue,2)}')

In [None]:
plt.scatter(south_lat,south_temp)
plt.title(f'Southern Hemisphere\n Wind Speed (MPH) vs. Latitude Linear Regression\n ({dates[0]})')
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (MPH)')
plt.grid(alpha=0.2)

regress = south_lat * slope + intercept
# regress

# Calculate y equation
line_equation = print(f"y = {round(slope, 2)} x + {round(intercept,2)}")

plt.plot(south_lat, regress, 'r-')
plt.savefig("output_data/southLat_humidity.png")

# Analysis