### Setup

In [74]:
# Dependencies
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import json

# Api key
from api_keys import weather_api_key

#citipy to determine city based latitude and longitude
from citipy import citipy

# Create an Output file
output_data_file = "output_data/cities.csv"

#Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180,180)

### Generate Cities List

In [58]:
# list for lat_lngs and cities
lat_lngs = []
cities = []

# Creating random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

#Finding the nearest city for each lat/lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    #if it is the first time the city is listed, add to city list
    if city not in cities:
        cities.append(city)
        
#Print the count to make sure it is 500+
len(cities)

600

### Perform API Calls

In [75]:
# set url for API
weather_url = "https://api.openweathermap.org/data/2.5/weather?"
units = "imperial"

#build query url
query_url = f"{weather_url}appid={weather_api_key}&units={units}&q="

SyntaxError: EOL while scanning string literal (232131323.py, line 6)

In [76]:
#create empty list for Max Temperature, longitue, latitude, humidity, cloudiness, wind speed, country, and date
max_temp_F = []
lat = []
lon = []
humidity = []
clouds = []
wind = []
country = []
date = []
city_name =[]

#Start printing log
print("Beginning Data Retrieval")
print("-------------------------------")

#create a counter to count the cities
city_count = 1

#loop through the cities and perform a data request
for city in cities:
    
    response = requests.get(query_url + city).json()
    
    time.sleep(1)
    
    #if data is found for the city
    try:
        max_temp_F.append(response['main']['temp_max'])
        lat.append(response['coord']['lat'])
        lon.append(response['coord']['lon'])
        humidity.append(response['main']['humidity'])
        clouds.append(response['clouds']['all'])
        wind.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        city_name.append(response['name'])
        
        print(f"Processing Record {city_count} | {city}")
        
        #Change counter value
        city_count = city_count + 1
        
    #if data is not found for the city    
    except:
        print("City not found. Skipping...")
    continue
    
# End printing log
print("-------------------------------")
print("Data Retrieval Complete")
print("-------------------------------")

Beginning Data Retrieval
-------------------------------
Processing Record 1 | port alfred
Processing Record 2 | longyearbyen
Processing Record 3 | buala
Processing Record 4 | punta arenas
City not found. Skipping...
Processing Record 5 | gold coast
Processing Record 6 | borogontsy
Processing Record 7 | butaritari
Processing Record 8 | angoche
Processing Record 9 | pochutla
Processing Record 10 | ushuaia
Processing Record 11 | comodoro rivadavia
Processing Record 12 | rikitea
Processing Record 13 | vila
City not found. Skipping...
Processing Record 14 | busselton
Processing Record 15 | bredasdorp
Processing Record 16 | salalah
Processing Record 17 | jamestown
Processing Record 18 | mataura
Processing Record 19 | hermanus
Processing Record 20 | victoria
City not found. Skipping...
Processing Record 21 | cherskiy
Processing Record 22 | cape town
Processing Record 23 | saskylakh
Processing Record 24 | port hardy
Processing Record 25 | sorland
Processing Record 26 | mingaora
Processing Rec

Processing Record 225 | jumla
Processing Record 226 | geraldton
Processing Record 227 | kopavogur
City not found. Skipping...
City not found. Skipping...
Processing Record 228 | port elizabeth
Processing Record 229 | marsa matruh
Processing Record 230 | elliot
Processing Record 231 | coos bay
City not found. Skipping...
Processing Record 232 | kropotkin
Processing Record 233 | westport
City not found. Skipping...
Processing Record 234 | taksimo
Processing Record 235 | farrukhnagar
Processing Record 236 | dennery
Processing Record 237 | bilma
Processing Record 238 | zhanaozen
Processing Record 239 | sal rei
Processing Record 240 | saint-georges
Processing Record 241 | unity
Processing Record 242 | paldiski
Processing Record 243 | jarwal
Processing Record 244 | turbat
Processing Record 245 | saldanha
Processing Record 246 | yulara
Processing Record 247 | kautokeino
Processing Record 248 | rabo de peixe
Processing Record 249 | preobrazheniye
Processing Record 250 | gagino
City not found. 

Processing Record 458 | itaqui
Processing Record 459 | talara
Processing Record 460 | tarazona
Processing Record 461 | talnakh
Processing Record 462 | craig
Processing Record 463 | miramar
Processing Record 464 | jiuquan
Processing Record 465 | banjarmasin
Processing Record 466 | arlit
Processing Record 467 | jaisalmer
Processing Record 468 | elbrus
Processing Record 469 | mecca
Processing Record 470 | timiryazevskoye
Processing Record 471 | udomlya
Processing Record 472 | chenghai
Processing Record 473 | jalu
Processing Record 474 | nampa
Processing Record 475 | saint-paul
Processing Record 476 | flin flon
Processing Record 477 | yaan
Processing Record 478 | rafsanjan
Processing Record 479 | cochrane
Processing Record 480 | north battleford
Processing Record 481 | noyabrsk
Processing Record 482 | dongsheng
Processing Record 483 | praia da vitoria
Processing Record 484 | zhezkazgan
Processing Record 485 | kapoeta
Processing Record 486 | shahrud
Processing Record 487 | sur
Processing Re

###  Convert Raw Data to DataFrame

In [77]:
#creating dataframe with data from API
weather_data_df = pd.DataFrame({"City": city_name, "Latitude": lat, "Longitude": lon,
                                "Max Temperature (F)": max_temp_F, "Humidity": humidity, "Cloudiness": clouds,
                                "Wind Speed": wind, "Country": country, "Date": date})
weather_data_df.head()

Unnamed: 0,City,Latitude,Longitude,Max Temperature (F),Humidity,Cloudiness,Wind Speed,Country,Date
0,Port Alfred,-33.5906,26.891,52.18,85,100,9.31,ZA,1660366254
1,Longyearbyen,78.2186,15.6401,46.24,81,75,9.22,SJ,1660366255
2,Buala,-8.145,159.5921,83.84,67,37,3.02,SB,1660366257
3,Punta Arenas,-53.15,-70.9167,33.91,80,40,6.91,CL,1660366242
4,Gold Coast,-28.0,153.4333,62.24,94,100,8.05,AU,1660366053


In [79]:
#summary stats to see the count of the cities (need 500+)
weather_data_df.describe()

Unnamed: 0,Latitude,Longitude,Max Temperature (F),Humidity,Cloudiness,Wind Speed,Date
count,555.0,555.0,555.0,555.0,555.0,555.0,555.0
mean,18.611237,15.478704,69.289387,70.473874,55.052252,7.619207,1660367000.0
std,33.100708,91.068948,13.294061,19.040067,38.923221,5.156494,272.5775
min,-54.8,-175.2,30.97,1.0,0.0,0.0,1660366000.0
25%,-8.75025,-60.7606,59.84,59.5,13.5,3.69,1660366000.0
50%,21.7667,20.2251,70.47,74.0,60.0,6.42,1660367000.0
75%,46.39,88.28195,79.045,84.0,97.0,10.36,1660367000.0
max,78.2186,179.3167,106.79,100.0,100.0,30.33,1660367000.0


### Inspect the data and remove cities where humidity > 100%