# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import requests
import json
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as st 
from scipy.stats import linregress
import numpy as np
import random
import pprint

# Import API keys 
import sys
sys.path.append("..")
from api_keys import weather_api_key

# Import citipy
from citipy import citipy 

# Output CVS file 
data_file = "../output_data/cities.csv"

# Range of latitudes and Longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [2]:
#Create an empty list for recording cities (latitude/longitude)
lat_lngs  = []
cities = []

# Create a loop to iterate through & append random cities 
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lng = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lng)

for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    
    if city not in cities:
        cities.append(city)
        
    
print(len(cities))

635


In [3]:
response = requests.get(f"http://api.openweathermap.org/data/2.5/weather?appid={weather_api_key}&units=metric&q=butaritari").json()
response
print(json.dumps(response, indent=4, sort_keys=True))

{
    "base": "stations",
    "clouds": {
        "all": 66
    },
    "cod": 200,
    "coord": {
        "lat": 3.0707,
        "lon": 172.7902
    },
    "dt": 1619318329,
    "id": 2110227,
    "main": {
        "feels_like": 31.67,
        "grnd_level": 1008,
        "humidity": 75,
        "pressure": 1009,
        "sea_level": 1009,
        "temp": 28.14,
        "temp_max": 28.14,
        "temp_min": 28.14
    },
    "name": "Butaritari",
    "rain": {
        "1h": 0.59
    },
    "sys": {
        "country": "KI",
        "sunrise": 1619288425,
        "sunset": 1619332379
    },
    "timezone": 43200,
    "visibility": 10000,
    "weather": [
        {
            "description": "light rain",
            "icon": "10d",
            "id": 500,
            "main": "Rain"
        }
    ],
    "wind": {
        "deg": 80,
        "gust": 5.01,
        "speed": 4.6
    }
}


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# loop through the cities 
city_name = []
latitude = []
longitude = []
max_temp = []
humidity  = []
cloudiness = []
wind_speed = []
countries = []
date = []
record_counter = 0
set_counter = 1

# Save config information
url = "http://api.openweathermap.org/data/2.5/weather?"
units = "metric"

# Build query URL
query_url = f"{url}appid={weather_api_key}&units={units}&q="

# Loop through the cities

for city in cities:
    try:
        response = requests.get(query_url + city).json()
        city_name.append(response["name"])
        latitude.append(response["coord"]["lat"])
        longitude.append(response["coord"]["lon"])
        max_temp.append(response["main"]["temp_max"])
        humidity.append(response["main"]["humidity"])
        cloudiness.append(response["clouds"]["all"])
        wind_speed.append(response["wind"]["speed"])
        countries.append(response["sys"]["country"])
        date.append(response["dt"])
        
        if record_counter > 49:
            record_counter = 0
            set_counter = set_counter + 1
            
        else:
            record_counter = record_counter + 1
            
        print(f"Processing Records {record_counter} of Set {set_counter} | {city}")
        
    except: 
         print("City not found. Skipping...")
            
        
        # End of printing
print("----------------------------------")
print("Data Retrieval Complete")
print("----------------------------------")



### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create the DataFrame
df = pd.DataFrame({"City" : city_name,
                  "Lat" : latitude,
                  "Lng" : longitude,
                  "Max Temp" : max_temp,
                  "Humidity" : humidity,
                  "Cloudiness" : cloudiness,
                  "Wind Speed" : wind_speed,
                  "Country" : countries, 
                  "Date" : date})

df

In [None]:
# Export the city data into a .csv
df.to_csv("output_data/cities.csv", index = False)

In [None]:
# Display the DataFrame
df = pd.read_csv("output_data/cities.csv")
df.head()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Build a scatter plot 
plt.scatter(df["Lat"], df["Max Temp"], marker="o", facecolor="blue", edgecolors="black")

# Graph properties
plt.title("City Latitude vs. Max Temperature")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (C)")

# Save the figure
plt.savefig("Latitude vs Temperature Plot.png")

# Show plot 
plt.show()

## Latitude vs. Humidity Plot

In [None]:
# Build a scatter plot 
plt.scatter(df["Lat"], df["Humidity"], marker="o", facecolor="blue", edgecolors="black")

# Graph properties
plt.title("City Latitude vs. Humidity")
plt.xlabel("Latitude")
plt.ylabel("Humidity")

# Save the figure
plt.savefig("City Latitude vs Humidity.png")

# Show plot 
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
# Build a scatter plot 
plt.scatter(df["Lat"], df["Cloudiness"], marker="o", facecolor="blue", edgecolors="black")

# Graph properties
plt.title("City Latitude vs. Cloudiness")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")

# Save the figure
plt.savefig("City Latitude vs Cloudiness.png")

# Show plot 
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
# Build a scatter plot 
plt.scatter(df["Lat"], df["Wind Speed"], marker="o", facecolor="blue", edgecolors="black")

# Graph properties
plt.title("City Latitude vs. Wind Speed")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (m/s)")

# Save the figure
plt.savefig("City Latitude vs Wind Speed.png")

# Show plot 
plt.show()

## Linear Regression

In [None]:
# split data into Northern and Southern hemisperes
northern_hemisphere = df[df["Lat"]>=0]
southern_hemisphere = df[df["Lat"]<0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Defining x and y values
x_values = northern_hemisphere["Lat"]
y_values = northern_hemisphere["Max Temp"]

# Calculating the correlation coefficient 
correlation = st.pearsonr(x_values, y_values)
print(f"The r-squared value is: {round(correlation[0],2)}")

# Calculating linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
      
# Creating the plot and labelling 
plt.scatter(northern_hemisphere["Lat"], northern_hemisphere["Max Temp"], edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(0,-15),fontsize=15,color="red")
plt.title("City Latitude vs Max Temperature Linear Regression (Northern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Max Temp")
plt.show()
      
# Saving graph as .png
plt.savefig("output_data/City Latitude vs Max Temperature Linear Regression (Northern Hemisphere).png")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Defining x and y values
x_values = southern_hemisphere["Lat"]
y_values = southern_hemisphere["Max Temp"]

# Calculating the correlation coefficient 
correlation = st.pearsonr(x_values, y_values)
print(f"The r-squared value is: {round(correlation[0],2)}")

# Calculating linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
      
# Creating the plot and labelling 
plt.scatter(southern_hemisphere["Lat"], southern_hemisphere["Max Temp"], edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,30),fontsize=15,color="red")
plt.title("City Latitude vs Max Temperature Linear Regression (Southern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Max Temp")
plt.show()
      
# Saving graph as .png
plt.savefig("output_data/City Latitude vs Max Temperature Linear Regression (Southern Hemisphere).png")

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Defining x and y values
x_values = northern_hemisphere["Lat"]
y_values = northern_hemisphere["Humidity"]

# Calculating the correlation coefficient 
correlation = st.pearsonr(x_values, y_values)
print(f"The r-squared value is: {round(correlation[0],2)}")

# Calculating linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
      
# Creating the plot and labelling 
plt.scatter(northern_hemisphere["Lat"], northern_hemisphere["Humidity"], edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,0),fontsize=15,color="red")
plt.title("City Latitude vs Humidity Linear Regression (Northern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.show()
      
# Saving graph as .png
plt.savefig("output_data/City Latitude vs Humidity Linear Regression (Northern Hemisphere).png")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
# Defining x and y values
x_values = southern_hemisphere["Lat"]
y_values = southern_hemisphere["Humidity"]

# Calculating the correlation coefficient 
correlation = st.pearsonr(x_values, y_values)
print(f"The r-squared value is: {round(correlation[0],2)}")

# Calculating linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
      
# Creating the plot and labelling 
plt.scatter(southern_hemisphere["Lat"], southern_hemisphere["Humidity"], edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,15),fontsize=15,color="red")
plt.title("City Latitude vs Humidity Linear Regression (Southern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Humidity")
plt.show()
      
# Saving graph as .png
plt.savefig("output_data/City Latitude vs Humidity Linear Regression (Southern Hemisphere).png")

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Defining x and y values
x_values = northern_hemisphere["Lat"]
y_values = northern_hemisphere["Cloudiness"]

# Calculating the correlation coefficient 
correlation = st.pearsonr(x_values, y_values)
print(f"The r-squared value is: {round(correlation[0],2)}")

# Calculating linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
      
# Creating the plot and labelling 
plt.scatter(northern_hemisphere["Lat"], northern_hemisphere["Cloudiness"], edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(40,25),fontsize=15,color="red")
plt.title("City Latitude vs Cloudiness Linear Regression (Northern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.show()
      
# Saving graph as .png
plt.savefig("output_data/City Latitude vs Cloudiness Linear Regression (Northern Hemisphere).png")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Defining x and y values
x_values = southern_hemisphere["Lat"]
y_values = southern_hemisphere["Cloudiness"]

# Calculating the correlation coefficient 
correlation = st.pearsonr(x_values, y_values)
print(f"The r-squared value is: {round(correlation[0],2)}")

# Calculating linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
      
# Creating the plot and labelling 
plt.scatter(southern_hemisphere["Lat"], southern_hemisphere["Cloudiness"], edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,60),fontsize=15,color="red")
plt.title("City Latitude vs Cloudiness Linear Regression (Southern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness")
plt.show()
      
# Saving graph as .png
plt.savefig("output_data/City Latitude vs Cloudiness Linear Regression (Southern Hemisphere).png")

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Defining x and y values
x_values = northern_hemisphere["Lat"]
y_values = northern_hemisphere["Wind Speed"]

# Calculating the correlation coefficient 
correlation = st.pearsonr(x_values, y_values)
print(f"The r-squared value is: {round(correlation[0],2)}")

# Calculating linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
      
# Creating the plot and labelling 
plt.scatter(northern_hemisphere["Lat"], northern_hemisphere["Wind Speed"], edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(0,10.2),fontsize=15,color="red")
plt.title("City Latitude vs Wind Speed Linear Regression (Northern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed")
plt.show()
      
# Saving graph as .png
plt.savefig("output_data/City Latitude vs Wind Speed Linear Regression (Northern Hemisphere).png")

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Defining x and y values
x_values = southern_hemisphere["Lat"]
y_values = southern_hemisphere["Wind Speed"]

# Calculating the correlation coefficient 
correlation = st.pearsonr(x_values, y_values)
print(f"The r-squared value is: {round(correlation[0],2)}")

# Calculating linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
      
# Creating the plot and labelling 
plt.scatter(southern_hemisphere["Lat"], southern_hemisphere["Wind Speed"], edgecolor="black")
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,10),fontsize=15,color="red")
plt.title("City Latitude vs Wind Speed Linear Regression (Southern Hemisphere)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.show()
      
# Saving graph as .png
plt.savefig("output_data/City Latitude vs Wind Speed Linear Regression (Southern Hemisphere).png")