In [None]:
# Observations!
# 1) According to this dataset there is slight correlation between latitude and humidity.
# 2) It seems as though there is more correlation betwen cloudiness and latitude in the southern hemisphere than the northern.
# 3) Temperature correlates with latitude in that the futher away the latitude is from the quator the lower the temperature is.

# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from typing import List
from scipy.stats import linregress
from datetime import datetime

# Import API key
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Create class for my API call.
class weather_data:
    def __init__(self, city, key):
        self.URL = "http://api.openweathermap.org/data/2.5/weather?"
        self.key = key
        self.city = city
        self.unit = 'imperial'
    
    # Create function that runs an API call for the data I'm pulling.
    def getWeather(self):
        query = str(self.URL+'&appid='+self.key+'&q='+self.city+'&units='+self.unit)
        data = requests.get(query)
        wjson = data.json()
        city_lon = wjson['coord']['lon']
        city_lat = wjson['coord']['lat']
        city_max_temp = wjson['main']['temp_max']
        city_humidity = wjson['main']['humidity']
        city_clouds = wjson['clouds']['all']
        city_wind = wjson['wind']['speed']
        city_country = wjson['sys']['country']
        city_datetime = wjson['dt']
        # Return a dict to get the values out of object.
        return {'City': self.city,'Lon': city_lon, 'Lat': city_lat, 'Max Temp': city_max_temp, 'Humidity': city_humidity, 'Cloudiness': city_clouds, 'Wind Speed': city_wind, 'Country': city_country, 'Date': city_datetime}
# Create empty list.
data_list = []
# Start up the loop for cities.
for city in cities:
    # Try to run the API call, and add pulled data to list.:
    try:
        weather_city = weather_data(city, weather_api_key)
        data_list.append(weather_city.getWeather())
    # If current city does not exist, move to next city.
    except:
        pass
# Create dataframe from our dict.
city_dataframe = pd.DataFrame.from_records(data_list)

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# Output dataframe to a csv file
city_dataframe.to_csv(R'../output_data/weather_csv.csv', index = False)

In [None]:
# Format dataframe date, then display & show the shape.
city_dataframe['Date'] = pd.to_datetime(city_dataframe['Date'],unit='s').dt.date
print(city_dataframe.head(), city_dataframe.shape)

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
# Check filter cities over 100% humidity
sweaty_city_check = city_dataframe[city_dataframe['Humidity'] <= 100]
# Check shape of new df and compare to previous shape.
print(sweaty_city_check.shape)
# No cities above 100% humidity

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
# Create plot
plt.scatter(city_dataframe['Lat'], city_dataframe['Max Temp'], marker='o')
plt.title(f'City Latitude vs City Temperature {time.strftime("(%m/%d/%Y)")}')
plt.ylabel('Temperature (F)')
plt.xlabel('Latitude')
plt.grid(True)
plt.tight_layout()
plt.show()

# This scatter plot shows relation between latitude and a city's maximum temperature. Maximum temperature changes depending on distance from the equator, value 0.

## Latitude vs. Humidity Plot

In [None]:
# Create plot
plt.scatter(city_dataframe['Lat'], city_dataframe['Humidity'], marker='o')
plt.title(f'City Latitude vs City Humidity {time.strftime("(%m/%d/%Y)")}')
plt.ylabel('Humidity')
plt.xlabel('Latitude')
plt.grid(True)
plt.tight_layout()
plt.show()

# This scatter plot shows relation between latitude and a city's humdity. The graph represents how humidity changes depending on the distance from the equator, but there seems to be no correlation between these values.

## Latitude vs. Cloudiness Plot

In [None]:
# Create plot
plt.scatter(city_dataframe['Lat'], city_dataframe['Cloudiness'], marker='o')
plt.title(f'City Latitude vs City Clludiness {time.strftime("(%m/%d/%Y)")}')
plt.ylabel('Cloudiness')
plt.xlabel('Latitude')
plt.grid(True)
plt.tight_layout()
plt.show()

# This scatter plot shows relation between latitude and a city's cloudiness. Looking at the plot you can there is no correlation between these values.

## Latitude vs. Wind Speed Plot

In [None]:
# Create plot
plt.scatter(city_dataframe['Lat'], city_dataframe['Wind Speed'], marker='o')
plt.title(f'City Latitude vs City Wind Speed {time.strftime("(%m/%d/%Y)")}')
plt.ylabel('Wind Speed')
plt.xlabel('Latitude')
plt.grid(True)
plt.tight_layout()
plt.show()

# This scatter plot shows relation between latitude and a city's wind speed. This plot shows no correlation between the two values since all the data is evenly spread. It does show though that most cities in the world are between 0 and 15 mph speeds.

## Linear Regression

In [None]:
# Create two new dataframes for northern and southern hems.
northern_hem = city_dataframe.loc[city_dataframe['Lat'] >= 0]
southern_hem = city_dataframe.loc[city_dataframe['Lat'] <= 0]

# Create a function that finds linear regression, then creates the plot.
def LinearRegressionPlots(xvalue, yvalue, xlabel, ylabel, eqpos, figure):
    (slp, intcpt, rval, pval, stderr) = linregress(xvalue, yvalue)
    reg_value = xvalue * slp + intcpt
    print(f'The r-squared value is: {rval}')
    equation = 'y = ' + str(round(slp, 2)) + 'x + ' + str(round(intcpt, 2))

    plt.scatter(xvalue, yvalue)
    plt.plot(xvalue, reg_value, 'r-')
    plt.annotate(equation, eqpos, fontsize=16, color='red')
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.tight_layout()
    plt.savefig(f'../output_data/fig{figure}.png')
    plt.show()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
# Fill in columns you want to use.
xvalue = 'Lat'
yvalue = 'Max Temp'
# Position the annotation.
eqpos = (0, 35)
# Give title
plt.title(f'Northern Hemisphere - Max Temp vs. Latitude Linear Regression')
# Run the function, then label X & Y.
LinearRegressionPlots(northern_hem[xvalue], northern_hem[yvalue], 'Latitude', 'Max Temperature (F)', eqpos, 1)

# This linear regression model shows the correlation between latitude and max temperature in the northern hemisphere. This shows a good correlation between the two values, however unlike the northern hemisphere, the southern temperatures do not change as drastically.

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
xvalue = 'Lat'
yvalue = 'Max Temp'
eqpos = (-55, 80)
plt.title(f'Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
LinearRegressionPlots(southern_hem[xvalue], southern_hem[yvalue], 'Latitude', 'Max Temperature (F)', eqpos, 2)

# This linear regression model shows the correlation between latitude and max temperature in the southern hemisphere. This shows a strong correlation between the two values in that the further away a city is from the equator the colder a city is.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
xvalue = 'Lat'
yvalue = 'Humidity'
eqpos = (45, 10)
plt.title(f'Nerthern Hemisphere - Max Temp vs. Latitude Linear Regression')
LinearRegressionPlots(northern_hem[xvalue], northern_hem[yvalue], 'Latitude', 'Humidity', eqpos, 3)

# This linear regression model shows the correlation between latitude and a city's humidity in northern hemisphere. According to ths plot it shows there is only slight correlation in humidity. The results however are not significant enough to be of signficant meaning.

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
xvalue = 'Lat'
yvalue = 'Humidity'
eqpos = (-56, 31)
plt.title(f'Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
LinearRegressionPlots(southern_hem[xvalue], southern_hem[yvalue], 'Latitude', 'Humidity', eqpos, 4)

# This linear regression model shows the correlation between latitude and a city's humidity in southern hemisphere. According to ths plot it shows there is slight correlation in humidity. The data is not significant.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
xvalue = 'Lat'
yvalue = 'Cloudiness'
eqpos = (0, 50)
plt.title(f'Nerthern Hemisphere - Max Temp vs. Latitude Linear Regression')
LinearRegressionPlots(northern_hem[xvalue], northern_hem[yvalue], 'Latitude', 'Cloudiness', eqpos, 5)

# The linear regression model shows the correlation between latitude and a city's cloudiness in the northern hemisphere. As you can see from this dataset there is no real correlation between the two values.

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
xvalue = 'Lat'
yvalue = 'Cloudiness'
eqpos = (-56, 25)
plt.title(f'Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
LinearRegressionPlots(southern_hem[xvalue], southern_hem[yvalue], 'Latitude', 'Cloudiness', eqpos, 6)

# The linear regression model shows the correlation between latitude and a city's cloudiness in the southern hemisphere. As you can see from this dataset there is no real correlation between the two values.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
xvalue = 'Lat'
yvalue = 'Wind Speed'
eqpos = (20, 25)
plt.title(f'Nerthern Hemisphere - Max Temp vs. Latitude Linear Regression')
LinearRegressionPlots(northern_hem[xvalue], northern_hem[yvalue], 'Latitude', 'Wind Speed', eqpos, 7)

# This linear regression model show the correlation between latitude and a city's wind speed in the northern hemisphere. There seems to be no correlation between these two values.

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
xvalue = 'Lat'
yvalue = 'Wind Speed'
eqpos = (-56, 17)
plt.title(f'Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
LinearRegressionPlots(southern_hem[xvalue], southern_hem[yvalue], 'Latitude', 'Wind Speed', eqpos, 8)

# This linear regression model show the correlation between latitude and a city's wind speed in the southern hemisphere. According to this dataset there does seem to be slight correlation between the two. The further away from the equator the stronger the winds may be for a city.