In [None]:
#SORT OUT SAVING FIGURES
#SEE IF IT RUNS
#LOOK AT THE CORRELATION AND OBSERVERATIONS

# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

589

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [6]:
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "Imperial"
query_url = f"{url}appid={weather_api_key}&units={units}&q="

In [7]:
# Create city data list
city_data = []

print("Beginning Data Retrieval")
print("-----------------------------")

record_count = 1
set_count = 1

# Loop through all the cities in list
for city in cities:

    try:
        print(f"Processing Record {record_count} of Set {set_count} | {city}")
        response = requests.get(query_url + city).json()
        
        lat = response["coord"]["lat"]
        lng = response["coord"]["lon"]
        max_temp = response["main"]["temp_max"]
        humidity = response["main"]["humidity"]
        cloudiness = response["clouds"]["all"]
        wind_speed = response["wind"]["speed"]
        country = response["sys"]["country"]
        date = response["dt"]     
            
        # Add City info into city_data list
        city_data.append({"City": city.title(), 
                          "Lat": lat, 
                          "Lng": lng, 
                          "Max Temp": max_temp,
                          "Humidity": humidity,
                          "Cloudiness": cloudiness,
                          "Wind Speed": wind_speed,
                          "Date": date,
                          "Country": country})
    

    except:
        print("City not found. Skipping...")
        
    record_count += 1
    if record_count == 50:
        record_count = 1
        set_count += 1
        
print("-----------------------------")
print("Data Retrieval Complete")


Beginning Data Retrieval
-----------------------------
Processing Record 1 of Set 1 | teya
Processing Record 2 of Set 1 | hilo
Processing Record 3 of Set 1 | new norfolk
Processing Record 4 of Set 1 | souillac
Processing Record 5 of Set 1 | vaini
Processing Record 6 of Set 1 | gat
Processing Record 7 of Set 1 | erenhot
Processing Record 8 of Set 1 | punta arenas
Processing Record 9 of Set 1 | samusu
City not found. Skipping...
Processing Record 10 of Set 1 | qaanaaq
Processing Record 11 of Set 1 | port hardy
Processing Record 12 of Set 1 | hermanus
Processing Record 13 of Set 1 | nikolskoye
Processing Record 14 of Set 1 | paltinis
Processing Record 15 of Set 1 | tiksi
Processing Record 16 of Set 1 | barentsburg
City not found. Skipping...
Processing Record 17 of Set 1 | cape town
Processing Record 18 of Set 1 | palabuhanratu
City not found. Skipping...
Processing Record 19 of Set 1 | mar del plata
Processing Record 20 of Set 1 | ushuaia
Processing Record 21 of Set 1 | busselton
Process

Processing Record 37 of Set 4 | hofn
Processing Record 38 of Set 4 | wangqing
Processing Record 39 of Set 4 | yulara
Processing Record 40 of Set 4 | meyungs
City not found. Skipping...
Processing Record 41 of Set 4 | behshahr
Processing Record 42 of Set 4 | coruripe
Processing Record 43 of Set 4 | vostok
Processing Record 44 of Set 4 | barrow
Processing Record 45 of Set 4 | sorrento
Processing Record 46 of Set 4 | belmonte
Processing Record 47 of Set 4 | hervey bay
Processing Record 48 of Set 4 | saint george
Processing Record 49 of Set 4 | cabatuan
Processing Record 1 of Set 5 | kosonsoy
Processing Record 2 of Set 5 | flinders
Processing Record 3 of Set 5 | takaka
Processing Record 4 of Set 5 | cam ranh
Processing Record 5 of Set 5 | wajima
Processing Record 6 of Set 5 | san lorenzo
Processing Record 7 of Set 5 | mezhdurechensk
Processing Record 8 of Set 5 | porvoo
Processing Record 9 of Set 5 | victoria
Processing Record 10 of Set 5 | leningradskiy
Processing Record 11 of Set 5 | che

Processing Record 30 of Set 8 | sur
Processing Record 31 of Set 8 | sorong
Processing Record 32 of Set 8 | raga
Processing Record 33 of Set 8 | sangar
Processing Record 34 of Set 8 | silver city
Processing Record 35 of Set 8 | pochutla
Processing Record 36 of Set 8 | ventspils
Processing Record 37 of Set 8 | buin
Processing Record 38 of Set 8 | gondanglegi
Processing Record 39 of Set 8 | santiago
Processing Record 40 of Set 8 | sabang
Processing Record 41 of Set 8 | san jose
Processing Record 42 of Set 8 | tete
Processing Record 43 of Set 8 | antofagasta
Processing Record 44 of Set 8 | haademeeste
City not found. Skipping...
Processing Record 45 of Set 8 | alexandria
Processing Record 46 of Set 8 | paita
Processing Record 47 of Set 8 | kuanshan
City not found. Skipping...
Processing Record 48 of Set 8 | atar
Processing Record 49 of Set 8 | san vicente
Processing Record 1 of Set 9 | deputatskiy
Processing Record 2 of Set 9 | kihei
Processing Record 3 of Set 9 | skiros
City not found. Sk

Processing Record 21 of Set 12 | jumla
Processing Record 22 of Set 12 | rafai
Processing Record 23 of Set 12 | nchelenge
Processing Record 24 of Set 12 | lorengau
Processing Record 25 of Set 12 | jurado
Processing Record 26 of Set 12 | abha
Processing Record 27 of Set 12 | puerto leguizamo
Processing Record 28 of Set 12 | morros
Processing Record 29 of Set 12 | raudeberg
Processing Record 30 of Set 12 | sabla
Processing Record 31 of Set 12 | kampot
Processing Record 32 of Set 12 | nata
Processing Record 33 of Set 12 | gabi
Processing Record 34 of Set 12 | mormugao
Processing Record 35 of Set 12 | comodoro rivadavia
Processing Record 36 of Set 12 | takab
Processing Record 37 of Set 12 | kayes
Processing Record 38 of Set 12 | yabrud
Processing Record 39 of Set 12 | nikolayevsk-na-amure
Processing Record 40 of Set 12 | babanusah
City not found. Skipping...
Processing Record 41 of Set 12 | preobrazheniye
Processing Record 42 of Set 12 | masvingo
Processing Record 43 of Set 12 | puerto esco

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [21]:
#Export city data to CSV
# Export the City_Data into a csv
city_data.to_csv(output_data.csv, index_label= "City ID")

AttributeError: 'list' object has no attribute 'to_csv'

In [10]:
city_dict = {"City": cities, "Latitude": lat, "Longtitude": lng, 
             "Max Temperature": max_temp, "Humidity": humidity, "Cloudiness": cloudiness, 
             "Wind Speed": wind_speed, "Country": country, "Date": date}
city_df = pd.DataFrame(city_dict)


In [11]:
city_df

Unnamed: 0,City,Latitude,Longtitude,Max Temperature,Humidity,Cloudiness,Wind Speed,Country,Date
0,teya,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
1,hilo,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
2,new norfolk,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
3,souillac,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
4,vaini,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
...,...,...,...,...,...,...,...,...,...
584,cheyenne,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
585,dzhebariki-khaya,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
586,bundaberg,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
587,sao felix do xingu,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [14]:
city_df.describe()

Unnamed: 0,Latitude,Longtitude,Max Temperature,Humidity,Cloudiness,Wind Speed,Date
count,589.0,589.0,589.0,589.0,589.0,589.0,589.0
mean,49.7502,-77.6328,46.51,76.0,90.0,20.71,1627648000.0
std,4.551339e-13,4.551339e-13,5.3336e-13,0.0,0.0,2.6668e-13,0.0
min,49.7502,-77.6328,46.51,76.0,90.0,20.71,1627648000.0
25%,49.7502,-77.6328,46.51,76.0,90.0,20.71,1627648000.0
50%,49.7502,-77.6328,46.51,76.0,90.0,20.71,1627648000.0
75%,49.7502,-77.6328,46.51,76.0,90.0,20.71,1627648000.0
max,49.7502,-77.6328,46.51,76.0,90.0,20.71,1627648000.0


In [15]:
#  Get the indices of cities that have humidity over 100%.
humid_city = city_df[(city_df["Humidity"] > 100)].index
                                  
humid_city

Int64Index([], dtype='int64')

In [16]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city = city_df.drop(humid_city, inplace=False)
clean_city.head()

Unnamed: 0,City,Latitude,Longtitude,Max Temperature,Humidity,Cloudiness,Wind Speed,Country,Date
0,teya,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
1,hilo,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
2,new norfolk,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
3,souillac,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164
4,vaini,49.7502,-77.6328,46.51,76,90,20.71,CA,1627648164


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [19]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Max Temperature"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.ylabel("Max Temperature (°F)")
plt.xlabel("Latitude")
plt.title("City Latitude vs. Max Temperature (28/07/2021)")
plt.grid(True)

# Save Figure
plt.savefig("Latitude_vs_Max_Temperature.png")
plt.show()


SyntaxError: invalid syntax (<ipython-input-19-23d24d74024e>, line 5)

This plot is showing the relationship between temperatre and latitude. The plot indicates that the temperature is greater at and around the equator, which is latitude 0 on the plot.

## Latitude vs. Humidity Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Humidity"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Humidity (28/07/2021)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("Latitude_vs_Humidity.png")
plt.show()

This scatter plot shows that there isn't much correlation between latitude and humidity.

## Latitude vs. Cloudiness Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Cloudiness"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Cloudiness (28/07/2021)")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("Latitude_vs_Cloudiness.png")
plt.show()

This scatter plot shows the relationship between Latitude and Cloudiness, and concludes that there isn't much correlation between the two.

## Latitude vs. Wind Speed Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Wind Speed"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Wind Speed (28/07/2021)")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("Latitude_vs_Wind_Speed.png")
plt.show()

This plot looks at the relationship between Latitude and Wind Speed. From this we can see that wind speed is greater, further away from the equator.

## Linear Regression

In [None]:
def plot_linear_regression(x_values, y_values, title, text_coordinates):
    
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

    # Scatter Plot
    plt.scatter(x_values,y_values, facecolors="blue", marker="o", edgecolor="black", alpha=0.5)
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,text_coordinates,fontsize=14,color="red")
    plt.xlabel('Latitude')
    plt.ylabel(title)
    print(f"The r-value is: {rvalue}")
    plt.show()

In [None]:
# Create Northern and Southern Hemisphere DataFrames
north_df = clean_city.loc[clean_city["Latitude"] >= 0,:]
south_df = cleany_city.loc[clean_city["Latitude"] < 0,:]


####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Max Temperature"]
plot_linear_regression(x_values,y_values,'Max Temperature(°F)',(6,30))

plt.savefig("north_lat_temp_regression.png")



####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Max Temperature"]
plot_linear_regression(x_values,y_values,'Max Temperature(°F),(6,30))

plt.savefig("south_lat_temp_regression.png")

Both the Northern and Southern Hemispheres show a....relationship between latitude and temperature. As you get closer to the equator (latitude 0) the higher the temperature gets. There is a strong correlation for both hemishperes, but the Northern Hemisphere seems to have a stonger relationship.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity (%)',(6,30))

plt.savefig("north_lat_humidity_regression.png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity (%), (6,30))

plt.savefig("south_lat_humidity_regression.png")

In both northern and southern hemispheres, there isn't a strong correlation between humidity and latitude. However, there does seem to be a slight positive correlation in the Northern Hemisphere and a slight negative correlation in the Southern Hemisphere; meaning in the Northern Hemisphere as you get further away from the equator (latitude 0) humidity increases, and in the Southern Hemisphere it decreases as you get further away from the equator.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness (%)'',(6,30))

plt.savefig("north_lat_cloudy_regression.png")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness (%)',(6,30))

plt.savefig("south_lat_cloudy_regression.png")

Initially, when looking at the plots no correlation can be seen between cloudiness and latitude. However, when the linear regression is performed, you can see that there is a slight positive correlation in the Northern Hemisphere but a strong negative correlation in the Southern Hemisphere. In the Southern Hemisphere it shows that the further you are from the equator (latitude 0) the less cloudy it is.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed (mph)',(6,30))

plt.savefig("north_lat_wind_regression.png")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed (mph)',(6,30))

plt.savefig("south_lat_wind_regression.png")

There is a correlation between wind speed and latitude from observing the plots however, the linear regression highlights a lower correlation than expected. The correlation in both southern and northern hemispheres, is of higher wind speeds further away from the equator at latitude 0.