# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
from scipy.stats import linregress
#import os
#import csv

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

# Output File (CSV)
output_data_file = "output_data/cities.csv"

# Range of latitudes and longitudes
lat_range = (-90, 90)
lng_range = (-180, 180)

response_json = []
response_info_json = []

## Generate Cities List

In [2]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(lat_range[0], lat_range[1], size=1500)
lngs = np.random.uniform(lng_range[0], lng_range[1], size=1500)
lat_lngs = zip(lats, lngs)

# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

640

In [3]:
#print cities list
#cities

In [4]:
#make the array/list into a simple df
cities_df = pd.DataFrame(cities,columns=['City'])
cities_df

Unnamed: 0,City
0,naryan-mar
1,mahebourg
2,kuito
3,chuy
4,ushuaia
...,...
635,nadym
636,norman wells
637,wajima
638,opuwo


### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [5]:
#test if my URL is working on a single city
#city = 'Las Vegas'

#base_url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={weather_api_key}"
base_url = f"http://api.openweathermap.org/data/2.5/weather?q={cities}&appid={weather_api_key}"
#base_url

In [6]:
# Add columns for lat, lng, airport name, airport address, airport rating
# Note that we used "" to specify initial entry.
cities_df["Lat"] = ""
cities_df["Lng"] = ""
cities_df["Max Temp"] = ""
cities_df["Humidity"] = ""
cities_df["Cloudiness"] = ""
cities_df["Wind Speed"] = ""
cities_df["Country"] = ""
cities_df["Date"] = ""
cities_df.head()

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,naryan-mar,,,,,,,,
1,mahebourg,,,,,,,,
2,kuito,,,,,,,,
3,chuy,,,,,,,,
4,ushuaia,,,,,,,,


In [7]:
#Used to test location of data and duild dataframe
#response_info_json[2]['dt']#['temp_max']#['coord']#['lon']

In [8]:
#request data from OpenWeather API website

#running for only 10 cities for pilot testing. --below code for full testing
#for x in range(len(cities)):
for x in range(10): #use during testing for speed
    print(f"Making request number: {x} for ID: {cities[x]}")

    # Get one of the posts
    response_info = requests.get(f"http://api.openweathermap.org/data/2.5/weather?q={cities[x]}&appid={weather_api_key}")

    # Save post's JSON
    response_info_json.append(response_info.json())
    try:
        cities_df.loc[x, "Lat"] = response_info_json[x]['coord']['lat']
        cities_df.loc[x, "Lng"] = response_info_json[x]['coord']['lon']
        cities_df.loc[x, "Max Temp"] = response_info_json[x]['main']['temp_max']
        cities_df.loc[x, "Humidity"] = response_info_json[x]['main']['humidity']
        cities_df.loc[x, "Cloudiness"] = response_info_json[x]['clouds']['all']
        cities_df.loc[x, "Wind Speed"] = response_info_json[x]['wind']['speed']
        cities_df.loc[x, "Country"] = response_info_json[x]['sys']['country']
        cities_df.loc[x, "Date"] = response_info_json[x]['dt']
    except KeyError:
        print("move on to next")
        
cities_df.head(10)    

Making request number: 0 for ID: naryan-mar
Making request number: 1 for ID: mahebourg
Making request number: 2 for ID: kuito
Making request number: 3 for ID: chuy
Making request number: 4 for ID: ushuaia
Making request number: 5 for ID: vostok
Making request number: 6 for ID: grindavik
Making request number: 7 for ID: busselton
Making request number: 8 for ID: cape town
Making request number: 9 for ID: port elizabeth


Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,naryan-mar,67.6713,53.087,274.26,96,50,4.14,RU,1618926600
1,mahebourg,-20.4081,57.7,301.15,78,75,1.54,MU,1618926600
2,kuito,-12.3833,16.9333,296.92,44,73,2.61,AO,1618926601
3,chuy,-33.6971,-53.4616,294.97,64,1,5.96,UY,1618926601
4,ushuaia,-54.8,-68.3,280.15,76,75,7.72,AR,1618926601
5,vostok,46.4856,135.883,270.7,47,0,3.41,RU,1618926602
6,grindavik,63.8424,-22.4338,277.15,65,75,5.14,IS,1618926602
7,busselton,-33.65,115.333,285.37,89,20,5.18,AU,1618926603
8,cape town,-33.9258,18.4232,299.26,46,0,4.63,ZA,1618926401
9,port elizabeth,-33.918,25.5701,292.15,82,0,7.72,ZA,1618926603


In [9]:
#type test- need numerical/float
cities_df.dtypes


City          object
Lat           object
Lng           object
Max Temp      object
Humidity      object
Cloudiness    object
Wind Speed    object
Country       object
Date          object
dtype: object

In [10]:
#change necessary type to numeric/float
cities_df["Lat"] = pd.to_numeric(cities_df["Lat"],errors='coerce')
cities_df["Lng"] = pd.to_numeric(cities_df["Lng"],errors='coerce')
cities_df["Max Temp"] = pd.to_numeric(cities_df["Max Temp"],errors='coerce')
cities_df["Humidity"] = pd.to_numeric(cities_df["Humidity"],errors='coerce')
cities_df["Cloudiness"] = pd.to_numeric(cities_df["Cloudiness"],errors='coerce')
cities_df["Wind Speed"] = pd.to_numeric(cities_df["Wind Speed"],errors='coerce')
cities_df["Date"] = pd.to_numeric(cities_df["Date"],errors='coerce')
cities_df.dtypes

City           object
Lat           float64
Lng           float64
Max Temp      float64
Humidity      float64
Cloudiness    float64
Wind Speed    float64
Country        object
Date          float64
dtype: object

In [11]:
#cities_agg_df=cities_df.agg(['count','mean', 'std', 'min', 'max'])
#cities_agg_df = cities_df.groupby('Sort').agg({"Lat":['count',np.mean, np.std, 'min', 'max']})
#tumor_vol_df = last_time_interval_mice_df[['Tumor Volume (mm3)','Drug Regimen']].copy()
#cities_agg_df
#cleaned_agg_df = cities_agg_df[['Lat','Lng','Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed', 'Date']].copy()
#cleaned_agg_df

In [12]:
#columns.describe(percentiles=[0.5, 0.95])
#cleaned_for_agg_df = cities_df[['Lat','Lng','Max Temp', 'Humidity', 'Cloudiness', 'Wind Speed']].copy()
#cleaned_for_agg_df.columns.agg('describe')[['25%', '50%', '75%', 'count']]
#cleaned_agg_df.columns.describe(percentiles=[0.5, 0.95])
cities_df.describe()

Unnamed: 0,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Date
count,10.0,10.0,10.0,10.0,10.0,10.0,10.0
mean,-4.4783,27.87348,287.208,68.7,36.9,4.805,1618927000.0
std,45.652918,66.207201,11.121285,18.577465,35.728762,2.013246,63.39506
min,-54.8,-68.3,270.7,44.0,0.0,1.54,1618926000.0
25%,-33.862775,-12.592025,277.9,51.25,0.25,3.5925,1618927000.0
50%,-27.02905,21.99665,288.76,70.5,35.0,4.885,1618927000.0
75%,31.768375,56.54675,296.4325,81.0,74.5,5.765,1618927000.0
max,67.6713,135.8833,301.15,96.0,75.0,7.72,1618927000.0


In [14]:
#DF with humidity >100
humidity_df = pd.DataFrame(cities_df[cities_df['Humidity'] > 100])
humidity_df

Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date


In [19]:
#DF with humidity < 101
clean_city_df = pd.DataFrame(cities_df[cities_df['Humidity'] < 101])
clean_city_df.dropna(inplace=False)
clean_city_df


Unnamed: 0,City,Lat,Lng,Max Temp,Humidity,Cloudiness,Wind Speed,Country,Date
0,naryan-mar,67.6713,53.087,274.26,96.0,50.0,4.14,RU,1618927000.0
1,mahebourg,-20.4081,57.7,301.15,78.0,75.0,1.54,MU,1618927000.0
2,kuito,-12.3833,16.9333,296.92,44.0,73.0,2.61,AO,1618927000.0
3,chuy,-33.6971,-53.4616,294.97,64.0,1.0,5.96,UY,1618927000.0
4,ushuaia,-54.8,-68.3,280.15,76.0,75.0,7.72,AR,1618927000.0
5,vostok,46.4856,135.8833,270.7,47.0,0.0,3.41,RU,1618927000.0
6,grindavik,63.8424,-22.4338,277.15,65.0,75.0,5.14,IS,1618927000.0
7,busselton,-33.65,115.3333,285.37,89.0,20.0,5.18,AU,1618927000.0
8,cape town,-33.9258,18.4232,299.26,46.0,0.0,4.63,ZA,1618926000.0
9,port elizabeth,-33.918,25.5701,292.15,82.0,0.0,7.72,ZA,1618927000.0


data_needed = ['Lat', 'Lng', 'Max Temp']
for x in data_needed:
    #tumor_size_array = pd.DataFrame(clean_mice_df[mice_tumor_all_df['Drug Regimen'] == x])
    cities_array= cities_df[x]
    
    count = len(cities_array)
    mean = round(np.mean(cities_array),2)
    #median_tumor = round(np.median(tumorsize),2)
    #var_tumor = round(np.var(tumorsize),2)
    st = round(np.std(cities_array),2)
    #max = round(sem(cities_array),2)
    
    cities_agg2_df[x] = pd.DataFrame([[count],
                                          [mean],
                                          [st],
                                          [max]],
                                        index= ['Count','Mean', 'St Dev', 'Max'],
                                        columns=[x])

#used to test my calculations    
#print(f"The mean temperature at the LAX airport is {sem_tumor}")
cities_agg2_df

for x in range(10): #use during testing for speed
#for x in range(len(cities)):
#for index,row in cities_pd.iterrows():
    print(f"Making request number: {x} for ID: {cities[x]}")
    cities_info = requests.get(f"http://api.openweathermap.org/data/2.5/weather?q={cities[x]}&appid={weather_api_key}")
    
    # print the cities_lat_lng url, avoid doing for public github repos in order to avoid exposing key
    # print(cities_lat_lng.url)
    
    # convert to json
    cities_info = cities_info.json()

    cities_df.loc[x, "Lat"] = cities_info[0]['coord']['lat']
    cities_df.loc[x, "Lng"] = cities_info[0]['coord']['lon']

# Visualize to confirm lat lng appear
cities_df.head()

# Specify the file to write to
output_path = os.path.join("..", 'PYBANK', "analysis", "analysis.csv")

# Open the file using "write" mode. Specify the variable to hold the contents
with open(output_path, 'w', newline='') as csvfile:

    # Initialize csv.writer
    csvwriter = csv.writer(csvfile) ## 'delimiter=',''what does this really do???
    
    
    # Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

## Inspect the data and remove the cities where the humidity > 100%.
----
Skip this step if there are no cities that have humidity > 100%. 

In [None]:
#  Get the indices of cities that have humidity over 100%.


In [None]:
# Make a new DataFrame equal to the city data to drop all humidity outliers by index.
# Passing "inplace=False" will make a copy of the city_data DataFrame, which we call "clean_city_data".


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

## Latitude vs. Humidity Plot

## Latitude vs. Cloudiness Plot

## Latitude vs. Wind Speed Plot

## Linear Regression

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression