In [None]:
#SORT OUT SAVING FIGURES
#SEE IF IT RUNS
#LOOK AT THE CORRELATION AND OBSERVERATIONS

# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

602

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [3]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "Imperial"
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [12]:
max_temp = []
humidity = []
wind_speed = []
cloudiness = []
lat = []
lng = []
country = []
date = []


print("Beginning Data Retrieval")
print("-----------------------------")

record_count = 1
set_count = 1

for city in cities:  
    try:
        print(f"Processing Record {record_count} of Set {set_count} | {city}")
        response = requests.get(query_url + city).json()
        
        max_temp.append(response['main']['temp_max'])
        humidity.append(response['main']['humidity'])
        wind_speed.append(response['wind']['speed'])
        cloudiness.append(response['clouds']['all'])
        lat.append(response['coord']['lat'])
        lng.append(response['coord']['lon'])
        country.append(response['sys']['country'])
        date.append(response['dt'])
        
    except KeyError:
        print(f"No city found. Skipping...")

    record_count += 1
    if record_count == 50:
        record_count = 1
        set_count += 1
        
print("-----------------------------")
print("Data Retrieval Complete")

Beginning Data Retrieval
-----------------------------
Processing Record 1 of Set 1 | hamilton
Processing Record 2 of Set 1 | souillac
Processing Record 3 of Set 1 | new norfolk
Processing Record 4 of Set 1 | filingue
Processing Record 5 of Set 1 | hobart
Processing Record 6 of Set 1 | mys shmidta
City not found. Skipping...
Processing Record 7 of Set 1 | jizan
Processing Record 8 of Set 1 | torbay
Processing Record 9 of Set 1 | east london
Processing Record 10 of Set 1 | santa cruz
Processing Record 11 of Set 1 | sentyabrskiy
City not found. Skipping...
Processing Record 12 of Set 1 | punta arenas
Processing Record 13 of Set 1 | imeni poliny osipenko
Processing Record 14 of Set 1 | khatanga
Processing Record 15 of Set 1 | kulhudhuffushi
Processing Record 16 of Set 1 | vaini
Processing Record 17 of Set 1 | whakatane
Processing Record 18 of Set 1 | cape town
Processing Record 19 of Set 1 | mataura
Processing Record 20 of Set 1 | ushuaia
Processing Record 21 of Set 1 | kudahuvadhoo
Proce

Processing Record 42 of Set 4 | doctor pedro p. pena
City not found. Skipping...
Processing Record 43 of Set 4 | necochea
Processing Record 44 of Set 4 | wawa
Processing Record 45 of Set 4 | ancud
Processing Record 46 of Set 4 | nanortalik
Processing Record 47 of Set 4 | bathsheba
Processing Record 48 of Set 4 | antofagasta
Processing Record 49 of Set 4 | barrow
Processing Record 1 of Set 5 | suez
Processing Record 2 of Set 5 | hernani
Processing Record 3 of Set 5 | tacuati
Processing Record 4 of Set 5 | moron
Processing Record 5 of Set 5 | oranjestad
Processing Record 6 of Set 5 | san rafael del sur
Processing Record 7 of Set 5 | illoqqortoormiut
City not found. Skipping...
Processing Record 8 of Set 5 | mogadishu
Processing Record 9 of Set 5 | hithadhoo
Processing Record 10 of Set 5 | trinidad
Processing Record 11 of Set 5 | bonthe
Processing Record 12 of Set 5 | grindavik
Processing Record 13 of Set 5 | besancon
Processing Record 14 of Set 5 | qaqortoq
Processing Record 15 of Set 5 

Processing Record 36 of Set 8 | nelson
Processing Record 37 of Set 8 | biasca
Processing Record 38 of Set 8 | sinegorye
Processing Record 39 of Set 8 | vaitupu
City not found. Skipping...
Processing Record 40 of Set 8 | kieta
Processing Record 41 of Set 8 | ulaangom
Processing Record 42 of Set 8 | mandera
Processing Record 43 of Set 8 | nizhniy kuranakh
Processing Record 44 of Set 8 | samusu
City not found. Skipping...
Processing Record 45 of Set 8 | tanout
Processing Record 46 of Set 8 | chavakachcheri
Processing Record 47 of Set 8 | verkhoyansk
Processing Record 48 of Set 8 | miquelon
Processing Record 49 of Set 8 | wampusirpi
City not found. Skipping...
Processing Record 1 of Set 9 | vilyuysk
Processing Record 2 of Set 9 | roros
Processing Record 3 of Set 9 | deputatskiy
Processing Record 4 of Set 9 | lumsden
Processing Record 5 of Set 9 | margau
Processing Record 6 of Set 9 | amurzet
Processing Record 7 of Set 9 | port-gentil
Processing Record 8 of Set 9 | itapetinga
Processing Rec

Processing Record 27 of Set 12 | pangai
Processing Record 28 of Set 12 | new glasgow
Processing Record 29 of Set 12 | roald
Processing Record 30 of Set 12 | asyut
Processing Record 31 of Set 12 | zhaotong
Processing Record 32 of Set 12 | warqla
City not found. Skipping...
Processing Record 33 of Set 12 | xicotencatl
Processing Record 34 of Set 12 | grand bend
Processing Record 35 of Set 12 | khairagarh
Processing Record 36 of Set 12 | thurso
Processing Record 37 of Set 12 | kibaya
Processing Record 38 of Set 12 | hirara
Processing Record 39 of Set 12 | bilibino
Processing Record 40 of Set 12 | ust-kamchatsk
City not found. Skipping...
Processing Record 41 of Set 12 | acapulco
Processing Record 42 of Set 12 | fortuna foothills
Processing Record 43 of Set 12 | moyale
Processing Record 44 of Set 12 | teahupoo
Processing Record 45 of Set 12 | iwanai
Processing Record 46 of Set 12 | segovia
Processing Record 47 of Set 12 | sorland
Processing Record 48 of Set 12 | vila do maio
Processing Rec

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [14]:
city_dict = {"City": cities, "Latitude": lat, "Longtitude": lng, "Max Temperature": max_temp, "Humidity": humidity, "Cloudiness": cloudiness, "Wind Speed": wind_speed, "Country": country, "Date": date}
city_data = pd.DataFrame(city_dict)


ValueError: arrays must all be same length

In [15]:
city_data

Unnamed: 0,City,Latitude,Longtitude,Max Temperature,Humidity,Cloudiness,Wind Speed,Country,Date
0,lagoa formosa,39.1834,-84.5333,73.72,90,37,1.01,US,1627642768
1,lagoa formosa,-20.5167,57.5167,75.58,60,40,12.66,MU,1627642769
2,lagoa formosa,-42.7826,147.0587,54.91,61,89,4.00,AU,1627642769
3,lagoa formosa,14.3521,3.3168,88.02,51,100,6.91,NE,1627642769
4,lagoa formosa,-42.8794,147.3294,55.26,60,75,5.01,AU,1627642769
...,...,...,...,...,...,...,...,...,...
550,lagoa formosa,52.2276,16.3653,79.77,39,44,15.37,PL,1627642841
551,lagoa formosa,46.5333,-87.6335,50.36,85,9,2.77,US,1627642841
552,lagoa formosa,-33.4569,-70.6483,51.85,31,0,4.05,CL,1627642759
553,lagoa formosa,31.6100,34.7642,94.41,40,0,13.38,IL,1627642841


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [6]:
city_data.describe

NameError: name 'city_data' is not defined

In [None]:
#  Get the indices of cities that have humidity over 100%.
humid_city = city_data[(city_data["Humidity"] > 100)].index
                                  
humid_city

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city = city_data.drop(humid_city, inplace=False)
clean_city.head()

In [None]:
# Export the City_Data into a csv
clean_city.to_csv(output_data_file, index_label="City_ID")

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Max Temperature"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Max Temperature (28/07/2021)")
plt.ylabel("Max Temperature (°F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("./Data/City_Latitude_vs_Max_Temperature.png")
plt.show()


This plot is showing the relationship between temperatre and latitude. The plot indicates that the temperature is greater at and around the equator, which is latitude 0 on the plot.

## Latitude vs. Humidity Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Humidity"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Humidity (28/07/2021)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("./Data/City_Latitude_vs_Humidity.png")
plt.show()

This scatter plot shows that there isn't much correlation between latitude and humidity.

## Latitude vs. Cloudiness Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Cloudiness"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Cloudiness (28/07/2021)")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("./Data/City_Latitude_vs_Cloudiness.png")
plt.show()

This scatter plot shows the relationship between Latitude and Cloudiness, and concludes that there isn't much correlation between the two.

## Latitude vs. Wind Speed Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Wind Speed"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Wind Speed (28/07/2021)")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("./Data/City_Latitude_vs_Wind_Speed.png")
plt.show()

This plot looks at the relationship between Latitude and Wind Speed. From this we can see that wind speed is greater, further away from the equator.

## Linear Regression

In [None]:
def plot_linear_regression(x_values, y_values, title, text_coordinates):
    
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

    # Scatter Plot
    plt.scatter(x_values,y_values, facecolors="blue", marker="o", edgecolor="black", alpha=0.5)
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,text_coordinates,fontsize=14,color="red")
    plt.xlabel('Latitude')
    plt.ylabel(title)
    print(f"The r-value is: {rvalue}")
    plt.show()

In [None]:
# Create Northern and Southern Hemisphere DataFrames
north_df = clean_city.loc[clean_city["Latitude"] >= 0,:]
south_df = cleany_city.loc[clean_city["Latitude"] < 0,:]


####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Max Temperature"]
plot_linear_regression(x_values,y_values,'Max Temperature(°F)',(6,30))

plt.savefig("./Data/north_lat_temp_regression.png")



####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Max Temperature"]
plot_linear_regression(x_values,y_values,'Max Temperature(°F),(6,30))

plt.savefig("./Data/south_lat_temp_regression.png")

Both the Northern and Southern Hemispheres show a....relationship between latitude and temperature. As you get closer to the equator (latitude 0) the higher the temperature gets. There is a strong correlation for both hemishperes, but the Northern Hemisphere seems to have a stonger relationship.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity (%)',(6,30))

plt.savefig("./Data/north_lat_humidity_regression.png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity (%), (6,30))

plt.savefig("./Data/south_lat_humidity_regression.png")

In both northern and southern hemispheres, there isn't a strong correlation between humidity and latitude. However, there does seem to be a slight positive correlation in the Northern Hemisphere and a slight negative correlation in the Southern Hemisphere; meaning in the Northern Hemisphere as you get further away from the equator (latitude 0) humidity increases, and in the Southern Hemisphere it decreases as you get further away from the equator.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness (%)'',(6,30))

plt.savefig("./Data/north_lat_cloudy_regression.png")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness (%)',(6,30))

plt.savefig("./Data/south_lat_humidity_regression.png")

Initially, when looking at the plots no correlation can be seen between cloudiness and latitude. However, when the linear regression is performed, you can see that there is a slight positive correlation in the Northern Hemisphere but a strong negative correlation in the Southern Hemisphere. In the Southern Hemisphere it shows that the further you are from the equator (latitude 0) the less cloudy it is.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed (mph)',(6,30))

plt.savefig("./Data/north_lat_cloudy_regression.png")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed (mph)',(6,30))

plt.savefig("./Data/north_lat_cloudy_regression.png")

There is a correlation between wind speed and latitude from observing the plots however, the linear regression highlights a lower correlation than expected. The correlation in both southern and northern hemispheres, is of higher wind speeds further away from the equator at latitude 0.