# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [18]:
# Dependencies and Setup
import json
import requests
from scipy import stats
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
import datetime
from scipy.stats import linregress

# Import API key
from api_key import weather_api_key

# Incorporate citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/city_data.csv"

# Range of latitude and longitude
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [19]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

640

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).


In [20]:
# Create a base url
base_url = "http://api.openweathermap.org/data/2.5/weather?units=imperial&appid=" + weather_api_key

# Counters
city_counter = 1
set_counter = 1

# Create the lists we will need to append to for the dataframe
city_names = []
lat = []
long = []
maxtemp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

print("Beginning Data Retrieval")
print("-----------------------------")

# Create a query url for each city in the cities list to get json response
for i, city in enumerate(cities):
    
    # Group cities as sets of 50s
    if (i % 50 == 0 and i >= 50):
        set_counter += 1
        city_counter = 1
         
    # Create API url for each city
    query_url = base_url +"&q=" + city
    
    # Get json respose for each city
    response = requests.get(query_url).json()
    
    # Print the results 
    print(f"Processing Record {city_counter} of Set {set_counter} | {city}")
    
    # Increase record count for next loop
    city_counter += 1
    
   # Add the values to the lists
    try:       
        cloudiness.append(response["clouds"]["all"])
        country.append(response["sys"]["country"])
        date.append(response["dt"])
        humidity.append(response["main"]["humidity"])
        lat.append(response["coord"]["lat"])
        long.append(response["coord"]["lon"])
        maxtemp.append(response["main"]["temp_max"])
        wind_speed.append(response["wind"]["speed"])
        city_names.append(response["name"])
    except:
        print("City not found. Skipping...")
        pass
       
print("-----------------------------")
print("Data Retrieval Complete")
print("-----------------------------")

Beginning Data Retrieval
-----------------------------
Processing Record 1 of Set 1 | tsihombe
City not found. Skipping...
Processing Record 2 of Set 1 | ozgon
City not found. Skipping...
Processing Record 3 of Set 1 | teya
Processing Record 4 of Set 1 | saskylakh
Processing Record 5 of Set 1 | saldanha
Processing Record 6 of Set 1 | butaritari
Processing Record 7 of Set 1 | cairns
Processing Record 8 of Set 1 | kodiak
Processing Record 9 of Set 1 | vieux-habitants
Processing Record 10 of Set 1 | kula
Processing Record 11 of Set 1 | mataura
Processing Record 12 of Set 1 | luderitz
Processing Record 13 of Set 1 | muzhi
Processing Record 14 of Set 1 | rikitea
Processing Record 15 of Set 1 | nanning
Processing Record 16 of Set 1 | riyadh
Processing Record 17 of Set 1 | dikson
Processing Record 18 of Set 1 | ushuaia
Processing Record 19 of Set 1 | qaqortoq
Processing Record 20 of Set 1 | mwinilunga
Processing Record 21 of Set 1 | port lincoln
Processing Record 22 of Set 1 | honiara
Process

Processing Record 42 of Set 4 | meulaboh
Processing Record 43 of Set 4 | saeby
Processing Record 44 of Set 4 | ngukurr
City not found. Skipping...
Processing Record 45 of Set 4 | taltal
Processing Record 46 of Set 4 | beidao
Processing Record 47 of Set 4 | san rafael
Processing Record 48 of Set 4 | calama
Processing Record 49 of Set 4 | rock sound
Processing Record 50 of Set 4 | samusu
City not found. Skipping...
Processing Record 1 of Set 5 | rassvet
Processing Record 2 of Set 5 | adrar
Processing Record 3 of Set 5 | mouzakion
City not found. Skipping...
Processing Record 4 of Set 5 | keita
Processing Record 5 of Set 5 | mount gambier
Processing Record 6 of Set 5 | grindavik
Processing Record 7 of Set 5 | bolshegrivskoye
City not found. Skipping...
Processing Record 8 of Set 5 | twin falls
Processing Record 9 of Set 5 | tumannyy
City not found. Skipping...
Processing Record 10 of Set 5 | mana
Processing Record 11 of Set 5 | ancud
Processing Record 12 of Set 5 | alta floresta
Processin

Processing Record 25 of Set 8 | bongor
Processing Record 26 of Set 8 | thessalon
Processing Record 27 of Set 8 | saint-augustin
Processing Record 28 of Set 8 | east wenatchee bench
Processing Record 29 of Set 8 | ajdabiya
Processing Record 30 of Set 8 | kyshtovka
Processing Record 31 of Set 8 | sao gabriel da cachoeira
Processing Record 32 of Set 8 | cidreira
Processing Record 33 of Set 8 | djambala
Processing Record 34 of Set 8 | atasu
Processing Record 35 of Set 8 | catuday
Processing Record 36 of Set 8 | premia de mar
Processing Record 37 of Set 8 | awbari
Processing Record 38 of Set 8 | librazhd
Processing Record 39 of Set 8 | college
Processing Record 40 of Set 8 | wollongong
Processing Record 41 of Set 8 | salalah
Processing Record 42 of Set 8 | bethanien
Processing Record 43 of Set 8 | aranos
Processing Record 44 of Set 8 | inhambane
Processing Record 45 of Set 8 | areosa
Processing Record 46 of Set 8 | kassala
Processing Record 47 of Set 8 | kodinsk
Processing Record 48 of Set 

Processing Record 13 of Set 12 | mar de espanha
Processing Record 14 of Set 12 | mahuva
Processing Record 15 of Set 12 | banjar
Processing Record 16 of Set 12 | anloga
Processing Record 17 of Set 12 | huarmey
Processing Record 18 of Set 12 | lata
Processing Record 19 of Set 12 | san juan bautista
Processing Record 20 of Set 12 | bay roberts
Processing Record 21 of Set 12 | jian
Processing Record 22 of Set 12 | singaraja
Processing Record 23 of Set 12 | neijiang
Processing Record 24 of Set 12 | manyana
Processing Record 25 of Set 12 | orange cove
Processing Record 26 of Set 12 | senmonorom
City not found. Skipping...
Processing Record 27 of Set 12 | robore
Processing Record 28 of Set 12 | gulshat
City not found. Skipping...
Processing Record 29 of Set 12 | ondjiva
Processing Record 30 of Set 12 | pocatello
Processing Record 31 of Set 12 | bergerac
Processing Record 32 of Set 12 | vardo
Processing Record 33 of Set 12 | sevenoaks
Processing Record 34 of Set 12 | dongsheng
Processing Recor

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [21]:
# Put the data into a dataframe
city_data_df = pd.DataFrame({
    "City": city_names,
    "Lat": lat,
    "Lng": long,
    "Max Temp": maxtemp,
    "Humidity": humidity,
    "Cloudiness": cloudiness,
    "Wind Speed": wind_speed,
    "Country": country,
    "Date": date
})

# Display the DataFrame
city_data_df

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Teya,60.3778,92.6267,37.90,72,100,10.60,RU,1620963727
1,Saskylakh,71.9167,114.0833,31.14,85,90,2.26,RU,1620963727
2,Saldanha,-33.0117,17.9442,50.00,81,97,6.80,ZA,1620963727
3,Butaritari,3.0707,172.7902,81.90,79,64,9.35,KI,1620963727
4,Cairns,-16.9167,145.7667,88.00,54,0,12.66,AU,1620963728
...,...,...,...,...,...,...,...,...,...
580,Eyl,7.9803,49.8164,83.34,62,83,22.88,SO,1620963863
581,Dubai,25.2582,55.3047,78.80,73,29,6.91,AE,1620963729
582,Morehead,37.2711,-87.1764,48.00,81,1,3.00,US,1620963863
583,Palma de Mallorca,39.5694,2.6502,61.00,88,20,2.30,ES,1620963652


In [26]:
# How many entries were returned?
city_data_df.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,Teya,60.3778,92.6267,37.9,72,100,10.6,RU,1620963727
1,Saskylakh,71.9167,114.0833,31.14,85,90,2.26,RU,1620963727
2,Saldanha,-33.0117,17.9442,50.0,81,97,6.8,ZA,1620963727
3,Butaritari,3.0707,172.7902,81.9,79,64,9.35,KI,1620963727
4,Cairns,-16.9167,145.7667,88.0,54,0,12.66,AU,1620963728


In [27]:
# Inspect the data by using the describe function
city_data_df.describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,585.0,585.0,585.0,585.0,585.0,585.0,585.0
mean,19.060698,13.697103,64.192701,70.278632,52.529915,7.371179,1620964000.0
std,32.528077,88.270925,17.007941,20.919103,38.340886,5.222472,68.41928
min,-54.8,-179.1667,15.8,7.0,0.0,0.11,1620963000.0
25%,-8.145,-61.9014,51.42,60.0,12.0,3.49,1620964000.0
50%,20.931,15.8999,64.33,74.0,62.0,5.75,1620964000.0
75%,46.8333,88.1167,78.8,87.0,90.0,9.55,1620964000.0
max,78.2186,179.3167,98.01,100.0,100.0,40.31,1620964000.0


In [17]:
# Export the city data into a csv file
city_data_df.to_csv("../output_data/city_data.csv", index=False)

In [12]:
# Show Record Count
countforme = len(city_data_df)
countforme

574

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

In [None]:
# Calculate latest date (max) for plot titles
tstamp = city_data_df.Date.max()

# Convert Unix timestamp to formated date
fdate= time.strftime("%m/%d/%y",time.localtime(tstamp))
fdate

#### Latitude vs. Temperature Plot

In [None]:
plt.scatter(city_data_df["Lat"], city_data_df["Max Temp"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Temperature (°F)")
plt.title("Temperature (°F) vs Latitude")

plt.savefig("../images/tempvlat.png")

# As latitude approaches 0, i.e. the equator, temperature increases

#### Latitude vs. Humidity Plot

In [None]:
plt.scatter(city_data_df["Lat"], city_data_df["Humidity"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.title("Humidity (%) vs Latitude")

plt.savefig("../images/humvlat.png")

# There seems to be no relationship between humidity and latitude

#### Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(city_data_df["Lat"], city_data_df["Cloudiness"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.title("Cloudiness (%) vs Latitude")

plt.savefig("../images/cloudvlat.png")

# There seems to be no relationship between cloudiness and latitude

#### Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(city_data_df["Lat"], city_data_df["Wind Speed"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.title("Wind Speed (mph) vs Latitude")

plt.savefig("../images/windvlat.png")

# There seems to be no relationship between wind speed and latitude

## Linear Regression

In [None]:
# Locate cities above or equal to 0 latitude and below 0 latitude
northern_cities = city_data_df[city_data_df.loc[:, "Lat"] >= 0]
southern_cities = city_data_df[city_data_df.loc[:, "Lat"] < 0]

In [None]:
# Create Northern and Southern Hemisphere DataFrames
northlat_df = city_data_df.loc[city_data_df["Lat"] >= 0,:]
southlat_df = city_data_df.loc[city_data_df["Lat"] < 0,:]

In [None]:
#Your next objective is to run linear regression on each relationship, only this time separating 
#them into Northern Hemisphere (greater than or equal to 0 degrees latitude) and 
#Southern Hemisphere (less than 0 degrees latitude):

#Northern Hemisphere - Humidity (%) vs. Latitude
#Southern Hemisphere - Humidity (%) vs. Latitude
#Northern Hemisphere - Cloudiness (%) vs. Latitude
#Southern Hemisphere - Cloudiness (%) vs. Latitude
#Northern Hemisphere - Wind Speed (mph) vs. Latitude
#Southern Hemisphere - Wind Speed (mph) vs. Latitude

#After each pair of plots explain what the linear regression is analyzing, any relationships you notice and any other analysis you may have.

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot Temperature (°F) vs Latitude with linear regression for northern hemisphere
plt.scatter(northern_cities["Lat"], northern_cities["Max Temp"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Temperature (°F)")
plt.title("Temperature (°F) vs Latitude (NH)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(northern_cities["Lat"], northern_cities["Max Temp"])
regress_values = northern_cities["Lat"] * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_equation,(10,10),fontsize=12,color="black")
plt.plot(northern_cities["Lat"],regress_values,"r-")

plt.savefig("../images/tempvlatnh.png")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Plot Temperature (F) vs Latitude with linear regression for southern hemisphere
plt.scatter(southern_cities["Lat"], southern_cities["Max Temp"], marker = "o", s = 25, facecolor = "teal", edgecolor="black")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Temperature (°F)")
plt.title("Temperature (°F) vs Latitude (SH)")
(slope, intercept, rvalue, pvalue, stderr) = linregress(southern_cities["Lat"], southern_cities["Max Temp"])
regress_values = southern_cities["Lat"] * slope + intercept
line_equation = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
plt.annotate(line_equation,(-53,81),fontsize=12,color="black")
plt.plot(southern_cities["Lat"],regress_values,"r-")

plt.savefig("../images/tempvlatsh.png")

In [None]:
print("The high r value indicates a strong positive correlation between latitude and max temperature.")

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = northlat_df["Lat"]
y_values = northlat_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity',(6,30))

plt.savefig('northernhumvlat')

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = southlat_df["Lat"]
y_values = southlat_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity',(6,30))

plt.savefig('southernhumvlat')

print("The low r values indicate a weak to no relationship between humidity and latitude.")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = northlat_df["Lat"]
y_values = northlat_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness',(6,30))

plt.savefig('northerncloudvlat')

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = southlat_df["Lat"]
y_values = southlat_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness',(6,30))

plt.savefig('southerncloudvlat')

print("The low r values indicate a weak positive relationship between latitude and cloudiness.")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = northlat_df["Lat"]
y_values = northlat_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed',(6,30))

plt.savefig('northernwindvlat')

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = southlat_df["Lat"]
y_values = southlat_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed',(6,30))

plt.savefig('southernwindvlat')

print("The low r value indicates that there is no significant relationship between wind speed and latitude. The difference between the hemispheres doesn't seem to be significant enough to comment upon.")
      °