# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [None]:
import concurrent.futures 
import requests
from pprint import pprint
import json
import os
from citipy import citipy
import random
import numpy
import pandas as pd
import matplotlib.pyplot as plt
import time


plt.style.use('seaborn-poster')
API_KEY = '726f6729b1b0ce3ac75298331adb8ac7'
URL = "https://api.openweathermap.org/data/2.5/weather?"
CITIES_TOT = 1500
SETS_NUM = 120
API_KEY = '726f6729b1b0ce3ac75298331adb8ac7'
URL = "https://api.openweathermap.org/data/2.5/weather?"


In [None]:
random.seed(42)
ran_lat= [random.uniform(-90,90) for _ in range(CITIES_TOT)]
ran_long = [random.uniform(-180,180) for _ in range(CITIES_TOT)]
dd = list(zip(ran_lat,ran_long))
dd

## Generate Cities List

In [None]:
cities = []
def generate_cities(lat):
    for rows in lat:
        city = citipy.nearest_city(rows[0],rows[1])
        cities.append(city.city_name)

generate_cities(dd)

final_cities = list(set(cities))
final_cities_clean = [final_cities[i:i+SETS_NUM] for i in range(0, len(final_cities), SETS_NUM)]
len(final_cities_clean)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
start = time.perf_counter()
json_data = []
print("""Beginning Data Retrieval     
---------------------------------------""")
def get_weather_data(cities):
    set_num = 1
    for enum,city in enumerate(cities,start=1):  
        parameters = {
            'q' : city,
            'appid' : API_KEY,
            'units':'metric'

        }
        try:           
            data_request = requests.get(URL,params =parameters ).json()
            print(f"Processing Record {enum} | {data_request['name']}")            
        except:
            print("City not found. Skipping...")
        finally:
            json_data.append(data_request)
#     set_num = set_num+1
    
    
for n in range(len(final_cities)):
    with concurrent.futures.ThreadPoolExecutor() as executor:
            executor.submit(get_weather_data, final_cities_clean[n])
            time.sleep(0.25)
#             get_weather_data(final_cities[n],n+1)
#             time.sleep(1)
 
print("""-----------------------------
Data Retrieval Complete      
-----------------------------""")

finish = time.perf_counter()
print(f'Finished in {round(finish-start, 2)} second(s)')

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
city_name = []
latitude = []
longitude =  []
max_temp = []
humidity = []
cloudiness = []
wind_speed = []
country = []
date = []
for data in json_data:
    try:
        city_name.append(data['name'])
        latitude.append(data['coord']['lat'])
        longitude.append(data['coord']['lon'])
        max_temp.append(data['main']['temp_max'])
        humidity.append(data['main']['humidity'])
        cloudiness.append(data['clouds']['all'])
        wind_speed.append(data['wind']['speed'])
        country.append(data['sys']['country'])
        date.append(data['dt'])
    except:
        pass
    

    
df = pd.DataFrame({
    'City':city_name,
    'Lat':latitude,
    'Lng':longitude,
    'Max Temp':max_temp,
    'Humidity':humidity,
    'Cloudiness':cloudiness,
    'Wind Speed':wind_speed,
    'Country':country,
    'Date':date
})



df.sort_values(by = 'City',ascending = True,inplace = True)
df.drop_duplicates(inplace=True)
df.reset_index(drop = True,inplace = True)
df

#Exporting to CSV

df.to_csv("../output_data/weather.csv",encoding='utf-8')

In [None]:
df.describe()

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.
df_copy = df.copy()
df_humidity_100 = df_copy.loc[df['Humidity']>=100]
df_humidity_100.index

In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".
clean_city_data = df_copy.drop(axis=0, index=df_humidity_100.index,inplace=False)
clean_city_data.reset_index(drop = True,inplace = True)
clean_city_data

In [None]:
clean_city_data['City'].value_counts()

## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:
clean_city_data.plot(kind='scatter',x = 'Lat',y ='Max Temp' )
plt.show()

## Latitude vs. Humidity Plot

In [None]:
clean_city_data.plot(kind='scatter',x = 'Lat',y ='Humidity' )
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
clean_city_data.plot(kind='scatter',x = 'Lat',y ='Cloudiness' )
plt.show()

## Latitude vs. Wind Speed Plot

In [None]:
clean_city_data.plot(kind='scatter',x = 'Lat',y ='Wind Speed' )
plt.show()

## Linear Regression

In [None]:
north_hem = clean_city_data.copy()
north_hem = north_hem.loc[north_hem['Lng'] > 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
north_hem.plot(kind = 'scatter',x = 'Lat',y = 'Max Temp')
plt.show()

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
south_hem = clean_city_data.copy()
south_hem = south_hem.loc[south_hem['Lng'] < 0]
south_hem.plot(kind = 'scatter',x = 'Lat',y = 'Max Temp')
plt.show()

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
north_hem.plot(kind = 'scatter',x = 'Lat',y = 'Humidity')
plt.show()

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
south_hem.plot(kind = 'scatter',x = 'Lat',y = 'Humidity')
plt.show()

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
north_hem.plot(kind = 'scatter',x = 'Lat',y = 'Cloudiness')
plt.show()

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
south_hem.plot(kind = 'scatter',x = 'Lat',y = 'Cloudiness')
plt.show()

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
north_hem.plot(kind = 'scatter',x = 'Lat',y = 'Wind Speed')
plt.show()

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
south_hem.plot(kind = 'scatter',x = 'Lat',y = 'Wind Speed')
plt.show()