# WeatherPy
----

#### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:

#!jupyter nbextension enable --py gmaps
#!pip install scipy
#!pip install citipy

In [2]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import random
from scipy.stats import linregress
import json

# Import API key
from api_keys import weather_api_key

# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy



## Generate Cities List

In [3]:
# Construct city list from the jason file that is distributed by api.openweather.org, save the list to "allcities.csv"

with open('city.list.json.gz.log', 'r',encoding='utf-8') as jsonfile:
    jsndata = json.load(jsonfile)

ids = []
nam = []
country = []
lat = []
lon = []

for i in range(len(jsndata)):
    ids.append(jsndata[i]['id'])
    nam.append(jsndata[i]['name'].lower())
    country.append(jsndata[i]['country'])
    # need to lower the letter case because citipy uses lower case letters
    lat.append(jsndata[i]['coord']['lat'])
    lon.append(jsndata[i]['coord']['lon'])
    
dftemp = pd.DataFrame({'id':ids,'City':nam,'Country':country,'Lat':lat,'Lng':lon})
dftemp.to_csv("../output_data/allcities.csv", index=False, header=True)

In [4]:

# Read dfcities.csv file into dataframe, previous cell is not required to run once the dfcities.csv is saved

dfcities = pd.read_csv("../output_data/allcities.csv", encoding="utf-8")
dfcities = dfcities.set_index('City')
dfcities.head()

Unnamed: 0_level_0,id,Country,Lat,Lng
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ḩeşār-e sefīd,833,IR,34.330502,47.159401
‘ayn ḩalāqīm,2960,SY,34.940079,36.321911
taglag,3245,IR,38.450001,44.98333
qabāghlū,3530,IR,36.173302,46.168499
‘arīqah,5174,SY,32.889809,36.48336


In [None]:
# Select 500 unique cities and save the data into dfselected.csv

ncities = 500
namsel = []
idsel = []
countrysel = []
latsel = []
lonsel = []
i = 0
while(i < ncities):
    i = i+1
    latitude = random.uniform(-90.0,90.0)
    longitude = random.uniform(-180.0,180.0)
    city = citipy.nearest_city(latitude,longitude)
    try:
        id = int(dfcities.loc[city.city_name]['id'])
        try:
            itemp = idsel.index(id)
            i = i - 1
        except:
            idsel.append(id)
            namsel.append(city.city_name)
            countrysel.append(dfcities.loc[city.city_name]['Country'])
            latsel.append(dfcities.loc[city.city_name]['Lat'])
            lonsel.append(dfcities.loc[city.city_name]['Lng'])
    except:
        i = i - 1

dftemp = pd.DataFrame({'City':namsel,'id':idsel,'Country':countrysel,'Lat':latsel,'Lng':lonsel})
dftemp.to_csv("../output_data/dfselected.csv", index=False, header=True)

In [None]:

# Read dfselected.csv file into dataframe, previous cells are not required to run once the dfselected.csv is saved
# generate scatter plot to make sure that cities are distributed throughout the globe

dfselected = pd.read_csv("../output_data/dfselected.csv", encoding="utf-8")

# print total # of selected cities
print(len(dfselected)) 

# check if the dataframe has the right format
print(dfselected.head())

# generate the scatter plot to make sure that the cities are evenly distributed throughout the globe
plt.scatter(dfselected["Lng"],dfselected["Lat"],marker="o",facecolors="red",edgecolors="black",s=50,alpha=0.75)

In [None]:

# Request weather informatiton for the selected cities and save into a json file weatherlist.json

jsnlist = []

i = 0
waitingtime = 0
ncities = len(dfselected)
while i < ncities:
    id = dfselected['id'][i]
    url = f"http://api.openweathermap.org/data/2.5/forecast?id={id}&units=imperial&APPID={weather_api_key}"
    jsn = requests.get(url).json()
    try:
        print('City-{0}: {1}'.format(i,jsn['city']['name']))
        jsnlist.append(jsn)
        i = i + 1
        waitingtime = 0
    except:
        waitingtime = waitingtime + 1
        print(f"Waiting {waitingtime} seconds")
        time.sleep(1)
    
with open('output_data/weatherlist.json', 'w') as jsonfile:
    json.dump(jsnlist, jsonfile)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:

# set lists for the dataframe
city_two = []
cloudinesses = []
dates = []
humidities = []
lats = []
lngs = []
max_temps = []
wind_speeds = []
countries = []

# set initial count quantities for organization
count_one = 0
set_one = 1

# loops for creating dataframe columns
for city in cities:
    try:
        response = requests.get(query_url + city.replace(" ","&")).json()
        cloudinesses.append(response['clouds']['all'])
        countries.append(response['sys']['country'])
        dates.append(response['dt'])
        humidities.append(response['main']['humidity'])
        lats.append(response['coord']['lat'])
        lngs.append(response['coord']['lon'])
        max_temps.append(response['main']['temp_max'])
        wind_speeds.append(response['wind']['speed'])
        if count_one > 48:
            count_one = 1
            set_one += 1
            city_two.append(city)
        else:
            count_one += 1
            city_two.append(city)
        print(f"Processing Record {count_one} of Set {set_one} | {city}")
    except Exception:
        print("City not found. Skipping...")

### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
# create a dictionary for establishing dataframe
weather_dict = {
    "City":city_two,
    "Cloudiness":cloudinesses,
    "Country":countries,
    "Date":dates,
    "Humidity":humidities,
    "Lat":lats,
    "Lng":lngs,
    "Max Temp":max_temps,
    "Wind Speed":wind_speeds
}


# establish dataframe
weather_dataframe = pd.DataFrame(weather_dict)

# show the top of the dataframe
weather_dataframe.head()


# output data to csv
weather_dataframe.to_csv(output_data_file)


## Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

## Latitude vs. Temperature Plot

In [None]:

plt.scatter(weather_dataframe["Lat"],weather_dataframe["Max Temp"],edgecolors="black",facecolors="skyblue")
plt.title("City Latitude vs. Max Temperature (02/06/21)")
plt.xlabel("Latitude")
plt.ylabel("Max Temperature (F)")
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")
plt.savefig("../output_data/fig1.png")
plt.show()

## Latitude vs. Humidity Plot

In [None]:

plt.scatter(weather_dataframe["Lat"],weather_dataframe["Humidity"],edgecolors="black",facecolors="skyblue")
plt.title("City Latitude vs. Humidity (02/06/21)")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.ylim(15,105)
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")
plt.savefig("Figures/fig2.png")
plt.show()

## Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(weather_dataframe["Lat"],weather_dataframe["Cloudiness"],edgecolors="black",facecolors="skyblue")
plt.title("City Latitude vs. Cloudiness (02/06/21)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")
plt.savefig("Figures/fig3.png")
plt.show()


## Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_dataframe["Lat"],weather_dataframe["Wind Speed"],edgecolors="black",facecolors="skyblue")
plt.title("City Latitude vs. Wind Speed (02/06/21)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph)")
plt.ylim(-2,34)
plt.grid (b=True,which="major",axis="both",linestyle="-",color="lightgrey")
plt.savefig("Figures/fig4.png")
plt.show()

## Linear Regression

In [None]:
# first lets find the northern and southern Hemisphere
nothern = cities.loc[cities["Lat"] >= 0.0]
nothern.reset_index(inplace=True)

southern = cities.loc[cities["Lat"] < 0.0]
southern.reset_index(inplace=True)

In [None]:
# define linear regression
def plotLinearRegression(xdata,ydata,xlbl,ylbl,lblpos,ifig):
    (slope, intercept, rvalue, pvalue, stderr) = linregress(xdata, ydata)
    print(f"The r-squared is: {rvalue}")
    regress_values = xdata * slope + intercept
    line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

    plt.scatter(xdata,ydata)
    plt.plot(xdata,regress_values,"r-")
    plt.annotate(line_eq,lblpos,fontsize=15,color="red")
    plt.xlabel(xlbl)
    plt.ylabel(ylbl)
    plt.savefig(f"output_data/fig{ifig}.png")
    plt.show()

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:

# Northern Hemisphere - Max Temp vs. Latitude Linear Regression

xlbl = "Lat"
ylbl = "Max Temp"
lblpos = (0,25)
plotLinearRegression(nothern[xlbl],nothern[ylbl],xlbl,ylbl,lblpos,5)

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
xlbl = "Lat"
ylbl = "Max Temp"
lblpos = (-55,90)
plotLinearRegression(southern[xlbl],southern[ylbl],xlbl,ylbl,lblpos,6)

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
xlbl = "Lat"
ylbl = "Humidity"
lblpos = (45,10)
plotLinearRegression(nothern[xlbl],nothern[ylbl],xlbl,ylbl,lblpos,7)

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:

# Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression
xlbl = "Lat"
ylbl = "Humidity"
lblpos = (-55,15)
plotLinearRegression(southern[xlbl],southern[ylbl],xlbl,ylbl,lblpos,8)

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:

# Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression
xlbl = "Lat"
ylbl = "Cloudiness"
lblpos = (20,40)
plotLinearRegression(nothern[xlbl],nothern[ylbl],xlbl,ylbl,lblpos,9)

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
# Sourhern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression
xlbl = "Lat"
ylbl = "Cloudiness"
lblpos = (-55,50)
plotLinearRegression(southern[xlbl],southern[ylbl],xlbl,ylbl,lblpos,10)

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
# Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

xlbl = "Lat"
ylbl = "Wind Speed"
lblpos = (0,30)
plotLinearRegression(nothern[xlbl],nothern[ylbl],xlbl,ylbl,lblpos,11)

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:

# Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression
xlbl = "Lat"
ylbl = "Wind Speed"
lblpos = (-25,33)
plotLinearRegression(southern[xlbl],southern[ylbl],xlbl,ylbl,lblpos,12)