In [None]:
#SORT OUT SAVING FIGURES
#SEE IF IT RUNS
#LOOK AT THE CORRELATION AND OBSERVERATIONS

# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
from time import sleep
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&APPID=" + weather_api_key

In [None]:
# List of city data
city_data = []

# Print to logger
print("Beginning Data Retrieval     ")
print("-----------------------------")

# Create counters
record_count = 1
set_count = 1

# Loop through all the cities in our list
for i, city in enumerate(cities):
        
    # Group cities in sets of 50 for logging purposes
    if (i % 50 == 0 and i >= 50):
        set_count += 1
        record_count = 1

    # Create endpoint URL with each city
    city_url = url + "&q=" + city.replace("  ", "+")
    
    # Log the url, record, and set numbers
    print(f"Processing Record {record_count} of Set {set_count} | {city}")

    # Add 1 to the record count
    record_count += 1

    # Run an API request for each of the cities
    try:
        city_weather = requests.get(city_url).json()
        # Parse out the max temp, humidity, and cloudiness
        city_lat = city_weather["coord"]["lat"]
        city_lng = city_weather["coord"]["lon"]
        city_max_temp = city_weather["main"]["temp_max"]
        city_humidity = city_weather["main"]["humidity"]
        city_clouds = city_weather["clouds"]["all"]
        city_wind = city_weather["wind"]["speed"]
        city_country = city_weather["sys"]["country"]
        city_weather_description = city_weather["weather"][0]["description"]       
            
            # Append the City information into city_data list
        city_data.append({"City": city.title(), 
                          "Lat": city_lat, 
                          "Lng": city_lng, 
                          "Max Temp": city_max_temp,
                          "Current Description": city_weather_description,
                          "Humidity": city_humidity,
                          "Cloudiness": city_clouds,
                          "Wind Speed": city_wind,
                          "Country": city_country})
    # If an error is experienced, skip the city

    except:
        print("City not found. Skipping...")
        pass

# Indicate that Data Loading is complete 
print("-----------------------------")
print("Data Retrieval Complete      ")
print("-----------------------------")
    


  

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
city_dict = {
    "City Name" : city
    "Latitude" = lat
    "Longtitude" = lng
    "Max Temperature" = max_temp
    "Humidity" = humidity
    "Cloudiness" = cloudiness
    "Wind Speed" = windspeed
    "Country"= country
    "Date" = date
city_data = pd.DataFrame(city_dict)

In [None]:
city_data

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
city_data.describe

In [None]:
#  Get the indices of cities that have humidity over 100%.
humid_city = city_data[(city_data["Humidity"] > 100)].index
                                  
humid_city

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city = city_data.drop(humid_city, inplace=False)
clean_city.head()

In [None]:
# Export the City_Data into a csv
clean_city.to_csv(output_data_file, index_label="City_ID")

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Max Temperature"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Max Temperature (28/07/2021)")
plt.ylabel("Max Temperature (°F)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("./Data/City_Latitude_vs_Max_Temperature.png")
plt.show()


This plot is showing the relationship between temperatre and latitude. The plot indicates that the temperature is greater at and around the equator, which is latitude 0 on the plot.

## Latitude vs. Humidity Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Humidity"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Humidity (28/07/2021)")
plt.ylabel("Humidity (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("./Data/City_Latitude_vs_Humidity.png")
plt.show()

This scatter plot shows that there isn't much correlation between latitude and humidity.

## Latitude vs. Cloudiness Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Cloudiness"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Cloudiness (28/07/2021)")
plt.ylabel("Cloudiness (%)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("./Data/City_Latitude_vs_Cloudiness.png")
plt.show()

This scatter plot shows the relationship between Latitude and Cloudiness, and concludes that there isn't much correlation between the two.

## Latitude vs. Wind Speed Plot

In [None]:
# Build Scatter Plot 
plt.figure(figsize = (6, 4))
plt.scatter(clean_city["Latitude"], clean_city["Wind Speed"], facecolors="blue", marker="o", edgecolor="black", alpha=0.5

plt.title("City Latitude vs. Wind Speed (28/07/2021)")
plt.ylabel("Wind Speed (mph)")
plt.xlabel("Latitude")
plt.grid(True)

# Save Figure
plt.savefig("./Data/City_Latitude_vs_Wind_Speed.png")
plt.show()

This plot looks at the relationship between Latitude and Wind Speed. From this we can see that wind speed is greater, further away from the equator.

## Linear Regression

In [None]:
def plot_linear_regression(x_values, y_values, title, text_coordinates):
    
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

    # Scatter Plot
    plt.scatter(x_values,y_values, facecolors="blue", marker="o", edgecolor="black", alpha=0.5)
    plt.plot(x_values,regress_values,"r-")
    plt.annotate(line_eq,text_coordinates,fontsize=14,color="red")
    plt.xlabel('Latitude')
    plt.ylabel(title)
    print(f"The r-value is: {rvalue}")
    plt.show()

In [None]:
# Create Northern and Southern Hemisphere DataFrames
north_df = clean_city.loc[clean_city["Latitude"] >= 0,:]
south_df = cleany_city.loc[clean_city["Latitude"] < 0,:]


####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Max Temperature"]
plot_linear_regression(x_values,y_values,'Max Temperature(°F)',(6,30))

plt.savefig("./Data/north_lat_temp_regression.png")



####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Max Temperature"]
plot_linear_regression(x_values,y_values,'Max Temperature(°F),(6,30))

plt.savefig("./Data/south_lat_temp_regression.png")

Both the Northern and Southern Hemispheres show a....relationship between latitude and temperature. As you get closer to the equator (latitude 0) the higher the temperature gets. There is a strong correlation for both hemishperes, but the Northern Hemisphere seems to have a stonger relationship.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity (%)',(6,30))

plt.savefig("./Data/north_lat_humidity_regression.png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Humidity"]
plot_linear_regression(x_values,y_values,'Humidity (%), (6,30))

plt.savefig("./Data/south_lat_humidity_regression.png")

In both northern and southern hemispheres, there isn't a strong correlation between humidity and latitude. However, there does seem to be a slight positive correlation in the Northern Hemisphere and a slight negative correlation in the Southern Hemisphere; meaning in the Northern Hemisphere as you get further away from the equator (latitude 0) humidity increases, and in the Southern Hemisphere it decreases as you get further away from the equator.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness (%)'',(6,30))

plt.savefig("./Data/north_lat_cloudy_regression.png")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Cloudiness"]
plot_linear_regression(x_values,y_values,'Cloudiness (%)',(6,30))

plt.savefig("./Data/south_lat_humidity_regression.png")

Initially, when looking at the plots no correlation can be seen between cloudiness and latitude. However, when the linear regression is performed, you can see that there is a slight positive correlation in the Northern Hemisphere but a strong negative correlation in the Southern Hemisphere. In the Southern Hemisphere it shows that the further you are from the equator (latitude 0) the less cloudy it is.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = north_df["Latitude"]
y_values = north_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed (mph)',(6,30))

plt.savefig("./Data/north_lat_cloudy_regression.png")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = south_df["Latitude"]
y_values = south_df["Wind Speed"]
plot_linear_regression(x_values,y_values,'Wind Speed (mph)',(6,30))

plt.savefig("./Data/north_lat_cloudy_regression.png")

There is a correlation between wind speed and latitude from observing the plots however, the linear regression highlights a lower correlation than expected. The correlation in both southern and northern hemispheres, is of higher wind speeds further away from the equator at latitude 0.