# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import seaborn as sns
import requests
import time
import json
import urllib
from scipy.stats import linregress

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

In [None]:
#OpenWeatherMap API Key
api_key = weather_api_key

#starting URL for Weather map API call
url = "http://api.openweathermap.org/data/2.5/weather?units=Imperial&"

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# List of city data
city_data = []


#Define empty lists to store values
city_name = []
lat = []
lng = []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []

#Print to logger
print("Beginning Data Retrieval")
print("------------------------")

#Create counters
record_count = 1
set_count = 1

#Looping through cities to make api calls using city as query
i = 1
for city in cities:
    city=city
    query_url = url + "appid=" + weather_api_key + "&q=" + city
    response = requests.get(query_url).json()
    
    if record_count < 50:
        record_count += 1
    else:
        set_count += 1
        record_count = 0
    print('Processing record {} of set {} | {}'.format(record_count, set_count, city))    

#Extracting data. Missing data is handled by try and except  
    try:
        city_name.append(response["name"])
        lat.append(response["coord"]["lat"])
        lng.append(response["coord"]["lon"])
        max_temp.append(response["main"]['temp_max'])
        humidity.append(response["main"]['humidity'])
        cloudiness.append(response["clouds"]["all"])
        wind_speed.append(response["wind"]["speed"])
        country.append(response["sys"]["country"])
        date.append(response["dt"])  
         
    except (KeyError):
        print("City not found. skipping.")
    
print('''
-----------------------------
Data Retrieval Complete
-----------------------------''')         
     

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
#Turn appended lists into dictionary
weather_dict = {
                "city": cities,
                "Lat": lat,
                "Lng": lng,
                "Max Temp": max_temp,
                "Cloudiness": cloudiness,
                "Humidity": humidity, 
                "Wind Speed": wind_speed,
                "Country": country,
                "Date": date                       
               }
#Turn dictionary into dataframe
city_data = pd.DataFrame({k: pd.Series(l) for k, l in weather_dict.items()})


# export dataframe to csv
export_csv = city_data.to_csv (output_data_file, index_label = "City_ID")

# display Dataframe head
city_data.head()

In [None]:
city_data.count()

In [None]:
# drop all the rows in which any of the column contains null value.
city_data = city_data.dropna(how="any")
city_data.count()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
#set todat date
today_date = pd.to_datetime('today').date().strftime('%m/%d/%y')

#Set x and y values
x_values = city_data['Lat']
y_values = city_data['Max Temp']

# Create scatter plot for lats vs temp
plt.scatter(x_values, y_values, edgecolors='black')

#Set Labels
plt.xlabel('Latitude')
plt.ylabel('Max Temperature (F)')
plt.grid()
plt.title(f"City Latitude vs. Max Temperature ({today_date})")

#save & show
plt.savefig('output_data/CityLatitudeVsMaxTemp.png')

## Latitude vs. Humidity Plot

In [None]:
#Set x and y values
x_values = city_data['Lat']
y_values = city_data['Humidity']

# Create scatter plot for lats vs temp
plt.scatter(x_values, y_values, edgecolors='black')

#Set Labels
plt.xlabel('Latitude')
plt.ylabel('Humidity (%)')
plt.title(f"City Latitude vs. Humidity ({today_date})")

#style grid
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")

#save & show
plt.savefig('output_data/CityLatitudeVsHumidity.png')

## Latitude vs. Cloudiness Plot

In [None]:
#Set x and y values
x_values = city_data['Lat']
y_values = city_data['Cloudiness']

# Create scatter plot for lats vs temp
plt.scatter(x_values, y_values, edgecolors='black')

#Set Labels
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title(f"City Latitude vs. Cloudiness ({today_date})")

#style grid
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")

#save & show
plt.savefig('output_data/CityLatitudeVsCloudiness.png')

## Latitude vs. Wind Speed Plot

In [None]:
#Set x and y values
x_values = city_data['Lat']
y_values = city_data['Wind Speed']

# Create scatter plot for lats vs temp
plt.scatter(x_values, y_values, edgecolors='black')

#Set Labels
plt.xlabel('Latitude')
plt.ylabel('Wind Speed (mph)')
plt.title(f"City Latitude vs. Wind Speed ({today_date})")

#style grid
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")

#save & show
plt.savefig('output_data/CityLatitudeVsWindSpeed.png')

## Linear Regression

In [None]:
 def linear_reg_plot(df, title_name, y_column_name, y_label, file_name, xy):
    x_values = df['Lat']
    y_values = df[y_column_name]
    (slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
    regress_values = x_values * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))
    plt.scatter(x_values,y_values)
    plt.plot(x_values,regress_values,"r-")
    plt.xlabel('Latitude')
    plt.ylabel(y_label)
    plt.annotate(line_eq,xy,fontsize=15,color="red")
    plt.title(title_name)
    
    print(f"The r-squared is: {rvalue}")
    
    plt.savefig(file_name)

In [None]:
# Create Northern and Southern Hemisphere DataFrames
northern_hemisphere_df = city_data.loc[city_data['Lat'] > 0, :]
southern_hemisphere_df = city_data.loc[city_data['Lat'] < 0, :]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
linear_reg_plot(northern_hemisphere_df, 'Northern Max Temp Vs Latitude Regression', 'Max Temp', 'Max Temp', 'output_data/NorthernMaxTempVsLatitudeReg.png', (10,0)) 

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
linear_reg_plot(southern_hemisphere_df, 'Southern Max Temp Vs Latitude Regression', 'Max Temp', 'Max Temp', 'output_data/SouthernMaxTempVsLatitudeReg.png', (-40,50)) 

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
linear_reg_plot(northern_hemisphere_df, 'Northern Humidity Vs. Latitude Regression', 'Humidity', 'Humidity', 'output_data/NorthernHumidityVsLatitudeReg.png', (40,20)) 

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
linear_reg_plot(southern_hemisphere_df, 'Southern Humidity Vs. Latitude Regression', 'Humidity', 'Humidity', 'output_data/SouthernHumidityVsLatitudeReg.png', (-50,20)) 

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
linear_reg_plot(northern_hemisphere_df, 'Northern Cloudiness Vs. Latitude Regression', 'Cloudiness', 'Cloudiness', 'output_data/NorthernCloudinessVsLatitudeReg.png', (40,20)) 

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
linear_reg_plot(southern_hemisphere_df, 'Southern Cloudiness Vs. Latitude Regression', 'Cloudiness', 'Cloudiness', 'output_data/SouthernCloudinessVsLatitudeReg.png', (-50,20)) 

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
linear_reg_plot(northern_hemisphere_df, 'Northern Wind Speed Vs. Latitude Regression', 'Wind Speed', 'Wind Speed', 'output_data/NorthernWindSpeedVsLatitudeReg.png', (40,20)) 

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
linear_reg_plot(southern_hemisphere_df, 'Southern Wind Speed Vs. Latitude', 'Wind Speed', 'Wind Speed', 'output_data/SouthernWindSpeedVsLatitudeReg.png', (-30,20)) 