# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
import scipy.stats as st
from datetime import datetime

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "../output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
np.random.seed(27)
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)


In [None]:
# Save config information
base_url = "http://api.openweathermap.org/data/2.5/weather?"

# Build partial query URL
query_url = base_url + "appid=" + weather_api_key + "&units=metric&q="

response = requests.get(query_url + cities[1]).json()
response

In [None]:
dt = 1660025415


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it's being processed (with the city number and city name).


In [None]:
# Save config information
base_url = "http://api.openweathermap.org/data/2.5/weather?"

# Build partial query URL
query_url = base_url + "appid=" + weather_api_key + "&units=metric&q="

# Set up lists to hold reponse info
number = 0
name = []
lat = []
long = []
temp = []
humid = []
cloud = []
windspd = []
country = []
date_time = []

# Loop through the list of cities and perform a request for data on each
for city in cities:
    response = requests.get(query_url + city).json()
    try:
        name.append(response['name'])
        lat.append(response['coord']['lat'])
        long.append(response['coord']['lon'])
        temp.append(response['main']['temp_max'])
        humid.append(response['main']['humidity'])
        cloud.append(response['clouds']['all'])
        windspd.append(response['wind']['speed'])
        country.append(response['sys']['country'])
        timestamp = response['dt']
        date_time.append(datetime.utcfromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S'))
        number += 1
        print(f"{number}: {city}\n")
    except:
        print(f"There was an error in retrieving the data for {city}\n")
        continue


In [None]:
# Print the city count to confirm sufficient count
print(f"There are {len(name)} cities in the dataset.")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Create a data frame from lists of response info
cities_df = pd.DataFrame({"City": name,
                            "Latitude": lat,
                            "Longitude": long,
                            "Temperature (C)": temp,
                            "Humidity (%)": humid,
                            "Cloudiness (%)": cloud,
                            "Wind Speed (m/s)": windspd,
                            "Country": country}).rename_axis("ID")

cities_df.to_csv(output_data_file, index=False, header=True)

cities_df


## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
print(f"There are {len(cities_df.loc[cities_df['Humidity (%)'] > 100, :])} cities in the dataset with a humidity greater than 100%.")


## Plotting the Data
* Use proper labelling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
plt.scatter(cities_df["Latitude"], cities_df["Temperature (C)"], color = "SteelBlue", edgecolors= "black")

plt.title("City Latitude vs Max Temperature")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (C)")
plt.xlim(-60,80)
plt.ylim(0,40)

plt.savefig("../output_data/fig1.png")

plt.show()


This graph represents the relationship between a city's distance from the equator (ie latitude) and its maximum temperature for a given day.

## Latitude vs. Humidity Plot

In [None]:
plt.scatter(cities_df["Latitude"], cities_df["Humidity (%)"], color = "SteelBlue", edgecolors= "black")

plt.title("City Latitude vs Humidity")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.xlim(-60,80)
plt.ylim(0,100)

plt.savefig("../output_data/fig2.png")

plt.show()

This graph represents the relationship between a city's distance from the equator (ie latitude) and its humidity for a given day.

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(cities_df["Latitude"], cities_df["Cloudiness (%)"], color = "SteelBlue", edgecolors= "black")

plt.title("City Latitude vs Cloudiness")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.xlim(-60,80)
plt.ylim(0,100)

plt.savefig("../output_data/fig3.png")

plt.show()

This graph represents the relationship between a city's distance from the equator (ie latitude) and its cloudiness for a given day.

## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(cities_df["Latitude"], cities_df["Wind Speed (m/s)"], color = "SteelBlue", edgecolors= "black")

plt.title("City Latitude vs Wind Speed")
plt.grid()
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (m/s)")
plt.xlim(-60,80)
plt.ylim(0,17.5)

plt.savefig("../output_data/fig4.png")

plt.show()


This graph represents the relationship between a city's distance from the equator (ie latitude) and its wind speed for a given day.

## Linear Regression

In [None]:
# Separate the dataset into dataframes for North and South Hemisphere
nth_hem_df = cities_df.loc[cities_df['Latitude'] > 0, :]
sth_hem_df = cities_df.loc[cities_df['Latitude'] < 0, :]


In [None]:
# Create a function to calculate linear regression for each graph

def linreg(x,y,hem):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x, y)
    regress_values = x * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))
    plt.scatter(x,y, color = "SteelBlue", edgecolors= "black")
    plt.plot(x,regress_values, color = "red", alpha=0.6)
    #plt.annotate(line_eq,pos,fontsize=15,color="red", alpha=0.8) 
    plt.title(f"{x.name} vs {y.name} {hem}ern Hemisphere")
    plt.xlabel(x.name)
    plt.ylabel(y.name)
    plt.show()
    print(f"The linear regression equation for these factors is:\n{line_eq}\n")
    correlation = st.pearsonr(x,y)
    if correlation[0] > 0:
        correlation_direction = "positive"
    else:
        correlation_direction = "negative"
    if abs(correlation[0]) < 0.3:
        correlation_type = "no correlation"
    elif abs(correlation[0]) < 0.5:
        correlation_type = f"a weak {correlation_direction} correlation"
    elif abs(correlation[0]) < 0.7:
        correlation_type = f"a moderate {correlation_direction} correlation"
    else:
        correlation_type = f"a strong {correlation_direction} correlation"
    print(f"The correlation coefficient for these factors is {round(correlation[0],2)}")
    print(f"This indicates {correlation_type} between these factors.")

###  Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
linreg(nth_hem_df["Latitude"],nth_hem_df["Temperature (C)"], "North")

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
linreg(sth_hem_df["Latitude"],sth_hem_df["Temperature (C)"],"South")

##### Analysis  
These results suggest a relationship between latitude and temperature.  
This confirms what would be expected as it is known that areas near the equator receive more heat from the sun.  
A moderate correlation has been returned for the North hemisphere and a strong correlation for the South hemisphere.  
From these results could use the linear regression equation to make a reasonable estimate of the maximum daily temperature for a city, when given it's latitude.  
Note that the linear regression equations for each hemisphere are significantly different. This could be due to sampling error. It would warrant further investigation.  
Ideally both equations should have the same (or near equal) intercept. This would make sense logically as for both equations the intercept represents the same real world value, ie the temperature at the equator.

### Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
linreg(nth_hem_df["Latitude"],nth_hem_df["Humidity (%)"], "North")

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
linreg(sth_hem_df["Latitude"],sth_hem_df["Humidity (%)"],"South")

##### Analysis  
These results suggest there is no relationship between latitude and humidity.  
This result is interesting as it challenges preconceived notions of the areas near the equator (colloquially, the "tropics") as having humid (or "tropical") weather.  
It should be noted that in the Northern Hemisphere graph there is a visible cluster of cities with a humidity of 75% or more, within close range of the equator. But this cluster is not significant enough to suggest a correlation.

### Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
linreg(nth_hem_df["Latitude"],nth_hem_df["Cloudiness (%)"], "North")

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
linreg(sth_hem_df["Latitude"],sth_hem_df["Cloudiness (%)"],"South")

##### Analysis  
These results suggest there is no relationship between latitude and cloudiness.  
The main statistical characteristic of the cloudiness data is it's high variability.  
The datapoints are heavily represented near the 0 minimum and 100% maximum.

### Wind Speed (m/s) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (m/s) vs. Latitude Linear Regression

In [None]:
linreg(nth_hem_df["Latitude"],nth_hem_df["Wind Speed (m/s)"], "North")

####  Southern Hemisphere - Wind Speed (m/s) vs. Latitude Linear Regression

In [None]:
linreg(sth_hem_df["Latitude"],sth_hem_df["Wind Speed (m/s)"],"South")

##### Analysis  
These results suggest there is no relationship between latitude and wind speed.  
A notable point from the data is that the respective linear regression equations are closer to each other than for any of the other graphs.