# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
from pprint import pprint
import json
import random

# Import API key
from config import api_key

#print(api_key)

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

## Generate Cities List

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
print(len(cities))



### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
# Retrieve articles

city_names = cities
city_weather = []
weather = {'City':[], 
           'Lat':[], 
            'Lng':[],
            'Max Temp':[],
            'Humidity':[],
           'Cloudiness':[],
           'Wind Speed':[],
           'Country':[],
           'Date':[]
             }
i=0
for city_name in city_names:
    #try:
    query_url = f"http://api.openweathermap.org/data/2.5/weather?q={city_name}&units=imperial&appid={api_key}"
    result = requests.get(query_url).json()
    #print(result)
    
    if 'coord' in result:  #Used this to check for missing cities, but will switch to the try/except method
        
        city_weather.append(result)
        print(f"Retrieving weather data for city number {city_names.index(city_name)+1}:  {city_name}")
        number = city_names.index(city_name)+1
        
        name = city_weather[i]['name']
        lat = city_weather[i]['coord']['lat']
        lng = city_weather[i]['coord']['lon']
        cur_temp = city_weather[i]['main']['temp']
        max_temp = city_weather[i]['main']['temp_max']
        hum = city_weather[i]['main']['humidity']
        clouds = city_weather[i]['clouds']['all']
        wind = city_weather[i]['wind']['speed']
        country = city_weather[i]['sys']['country']
        date = city_weather[i]['dt']
    
        weather['City'].append(name)
        weather['Lat'].append(lat) 
        weather['Lng'].append(lng) 
        weather['Max Temp'].append(max_temp) 
        weather['Humidity'].append(hum) 
        weather['Cloudiness'].append(clouds) 
        weather['Country'].append(country) 
        weather['Wind Speed'].append(wind)
        weather['Date'].append(date) 
        i+=1
    
    else: 
        print(f"{city_name} not found, skip this one")
    
      

#print(number,name, lat, hum, clouds, wind, country, date)

#print (result)   


#pprint(city_weather)




### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
weather_df = pd.DataFrame(weather)
weather_df.to_csv("output_data/cities.csv", index = False)
weather_df

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
len(weather_df)
for i in range(len(weather_df)):
    if weather_df['Humidity'][i] > 100:
        print(i)
        print(weather_df['Humidity'][i])
        
        weather_df.drop(index=i)

clean_city_data = weather_df        
len(clean_city_data)
clean_city_data.rename(columns = {'Lat': 'Latitude'}, inplace = True)





In [None]:
clean_city_data.head()


In [None]:
#Define function for making  scatter plots

def make_scatter_plot(df, x_col, y_col):
    "This creates a scatter plot given a list of x values, y values, x label, y label and title"
    x = df[x_col]
    y = df[y_col]
    
    #generating random number to be included in fig name so that new figure is unlikely to 
    #be named the same as a previous figure 
    
    fig_num = random.randint(1, 100)
    
    plt.scatter(x, y)
    plt.xlabel(x_col)
    plt.ylabel(y_col)
    plt.title(f"{y_col} as a function of {x_col}")
    plt.ylim(min(y)-10,max(y)+10)
    plt.savefig(f"{df}_{x_col}_{y_col}_{fig_num}.png")
   
    
    linregress(x,y)
    reg = linregress(x,y)
    reg_line = reg[0]*x + reg[1]

    plt.plot(x,reg_line, color="red")
    r_squared = round(reg[2]**2,2)
    print(f"r squared = {r_squared}")
              


## Latitude vs. Temperature Plot

In [None]:
make_scatter_plot(clean_city_data, 'Latitude', 'Max Temp')


This plot shows maximum temperature plotted against latitude.  Temperature appears to
decrease as the latitude gets farther from 0, more so in the northern hemisphere (lat > 0).
The r-squared value of .52 doesn't seem particularly strong, but this is confounded by using negative and positive lat numbers in the same graph.  Graphs per hemisphere, and based on the absolute latitude are shown in later figures.

## Latitude vs. Humidity Plot

In [None]:
make_scatter_plot(clean_city_data,'Latitude','Humidity')

There does not seem to be an effect of latitude on humidity

## Latitude vs. Cloudiness Plot

In [None]:
make_scatter_plot(clean_city_data,'Latitude','Cloudiness')

There is no correlation between latitude and cloudiness.

## Latitude vs. Wind Speed Plot

In [None]:
make_scatter_plot(clean_city_data, 'Latitude','Wind Speed')

There is no correlation between latitude and wind speed

## Linear Regression

In [None]:
# The clean city daa frame was filtered for latitiudes > or < 0, splitting it into two DFs, 
#one for the northern hemisphere and one for the southern hemisphere.

clean_city_data.head()

north = clean_city_data[clean_city_data['Latitude']>0]
north.head()
print(f"{len(north)} cities were analyzed for the northern hemisphere")

south = clean_city_data[clean_city_data['Latitude']<0]
south.head()
print(f"{len(south)} cities were analyzed for the southern hemisphere")  

# check that filtering worked
min(north['Latitude'])
max(south['Latitude'])

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
make_scatter_plot(north, 'Latitude','Max Temp')

Focusing just on the northern hemisphere (lat > 0) reveals a much stronger correlation
(r^2 = 0.74) between Max Temp and Latitude

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
make_scatter_plot(south, 'Latitude','Max Temp')

Focusing just on the southern hemisphere (lat < 0) reveals an apparent correlation between
Max Temp and Latitude (r^2 = .37).  this correlation is much weaker than that for the northern hemisphere.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
make_scatter_plot(north, 'Latitude','Humidity')

 There is perhaps a weak correlation between Latitude and Humidity when considering just the
northern hemisphere.

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
make_scatter_plot(south, 'Latitude','Humidity')

There is no correlation between Latitude and Humidity when considering just the
southern hemisphere.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
make_scatter_plot(north, 'Latitude','Cloudiness')

There is no correlation between Latitude and Cloudiness when considering just the
northern hemisphere.

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
make_scatter_plot(south, 'Latitude','Cloudiness')

There is no correlation between Latitude and Cloudiness when considering just the
southern hemisphere.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
make_scatter_plot(north, 'Latitude','Wind Speed')

There is no correlation between Latitude and Cloudiness when considering just the
southern hemisphere.

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
make_scatter_plot(south, 'Latitude','Wind Speed')

There is no correlation between Latitude and wind speed when considering just the
southern hemisphere.

Convert latitude to abs value to see if raw distance from equator is linear  temp

In [None]:
abs_lat = np.absolute(clean_city_data['Latitude'])
clean_abs = clean_city_data
clean_abs['Abs Latitude'] = abs_lat

In [None]:

make_scatter_plot(clean_abs,'Abs Latitude','Max Temp')


There is a strong correlation between Latitude and Max temp when considering just the
raw distance from the equator (absolute value of latititude)