## WeatherPy
----

#### Observations
* Max Temperature has a high correlation with Latitude. Most notably the hottest temperatures found closest to the equator 
* Most cities had a humidity above 40% with the majority falling between 60% and 100%.
* There is no observable trend between cloudiness and latitude.
* Wind speed most often stay below 15 mph but the greatest wind speeds and variations seem to occur after a latitude of 60 degrees north. 

In [11]:
# Dependencies and Setup
import pandas as pd
# File to Load (Remember to Change These)
cities_data = "cities.csv"
cities_df = pd.read_csv(cities_data)
cities_df.head()

Unnamed: 0.1,Unnamed: 0,City,Country,Date,Lat,Lng,Humidity,Cloudiness,Wind Speed,Max Temp
0,0,taolanaro,,,,,,,,
1,1,hermanus,ZA,2020-04-11 16:46:54,-34.42,19.23,93.0,100.0,18.01,63.0
2,2,atuona,PF,2020-04-11 16:46:55,-9.8,-139.03,66.0,48.0,5.77,82.89
3,3,margate,GB,2020-04-11 16:46:55,51.38,1.39,87.0,41.0,3.36,57.0
4,4,mataura,NZ,2020-04-11 16:46:55,-46.19,168.86,58.0,93.0,1.99,64.0


## Generate Cities List

In [12]:
#export to html
# cities_df.to_csv('../output_data/cities.csv')
cities_df.to_html('../Resources/weather_data.html')

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

#### Latitude vs. Temperature Plot

In [None]:
x_values = pd.to_numeric(weather_data['Lat'], downcast="float")
y_values = pd.to_numeric(weather_data['Max Temp'], downcast="float")

In [None]:
# Build a scatter plot for each data type
plt.scatter(x_values, y_values, marker='o')

# Incorporate the other graph properties
plt.title(f'City Latitude vs. Max Temperature {time.strftime("%m/%d/%Y")}')
plt.ylabel("Max Temperature (F)") ; plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("../output_data/Lat_vs_MaxTemp.png")

# Show plot
plt.show()

##### The closer the equator, the greater the max temperature.  

#### Latitude vs. Humidity Plot

In [None]:
y_values = pd.to_numeric(weather_data['Humidity'], downcast="float")

# Build a scatter plot for each data type
plt.scatter(x_values, y_values, marker='o')

# Incorporate the other graph properties
plt.title(f'City Latitude vs Humidity {time.strftime("%m/%d/%Y")}')
plt.ylabel("Humidity (%)") ; plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("../output_data/Lat_vs_Humid.png")

# Show plot
plt.show()

##### Most cities have humidity above 40%.    

#### Latitude vs. Cloudiness Plot

In [None]:
y_values = pd.to_numeric(weather_data['Cloudiness'], downcast="float")

# Build a scatter plot for each data type
plt.scatter(x_values, y_values, marker='o')

# Incorporate the other graph properties
plt.title(f'City Latitude vs Cloudiness {time.strftime("%m/%d/%Y")}')
plt.ylabel("Cloudiness (%)") ; plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("../output_data/Lat_vs_Cloud.png")

# Show plot
plt.show()

##### There does not appear to be a relationship between cloudiness and location.   

#### Latitude vs. Wind Speed Plot

In [None]:
y_values = pd.to_numeric(weather_data['Wind Speed'], downcast="float")

# Build a scatter plot for each data type
plt.scatter(x_values, y_values, marker='o')

# Incorporate the other graph properties
plt.title(f'City Latitude vs Wind Speed {time.strftime("%m/%d/%Y")}')
plt.ylabel("Wind Speed (mph)") ; plt.xlabel("Latitude")
plt.grid(True)

# Save the figure
plt.savefig("../output_data/Lat_vs_Wind.png")

# Show plot
plt.show()

##### Wind speed generally falls below 15 mph.  Outliers increase the further you get from the equator. 

## Linear Regression

In [None]:
# OPTIONAL: Create a function to create Linear Regression plots

In [None]:
weather_data_df= pd.DataFrame(weather_data)
weather_data_df

In [None]:
# Create Northern and Southern Hemisphere DataFrames
north_hem_df = weather_data_df.loc[pd.to_numeric(weather_data_df["Lat"]).astype(float) > 0]
south_hem_df = weather_data_df.loc[pd.to_numeric(weather_data_df["Lat"]).astype(float) < 0]

####  Northern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(north_hem_df['Lat'], downcast="float")
y_values = pd.to_numeric(north_hem_df['Max Temp'], downcast="float")

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(5,10),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp (F)')
plt.title('Northern Hemisphere - Max Temp vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")

# Save the figure
plt.savefig("../output_data/NH_Lat_vs_Temp.png")

plt.show()

##### The max temp fits the regression line which indicated a fairly strong correlation.

####  Southern Hemisphere - Max Temp vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(south_hem_df['Lat'], downcast="float")
y_values = pd.to_numeric(south_hem_df['Max Temp'], downcast="float")

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-50,80),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Max Temp (F)')
plt.title('Southern Hemisphere - Max Temp vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")

# Save the figure
plt.savefig("../output_data/SH_Lat_vs_Temp.png")

plt.show()

##### The max temp fits the regression line which indicated a fairly strong correlation.

####  Northern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(north_hem_df['Lat'], downcast="float")
y_values = pd.to_numeric(north_hem_df['Humidity'], downcast="float")

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,20),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('Northern Hemisphere - Humidity vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")

# Save the figure
plt.savefig("../output_data/NH_Lat_vs_Humid.png")

plt.show()

##### The humidity does not fit the regression line which indicated a poor correlation.

####  Southern Hemisphere - Humidity (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(south_hem_df['Lat'], downcast="float")
y_values = pd.to_numeric(south_hem_df['Humidity'], downcast="float")

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-25,40),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Humidity')
plt.title('Southern Hemisphere - Humidity vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")

# Save the figure
plt.savefig("../output_data/SH_Lat_vs_Humid.png")

plt.show()

##### The humidity does not fit the regression line which indicated a poor correlation.

####  Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(north_hem_df['Lat'], downcast="float")
y_values = pd.to_numeric(north_hem_df['Cloudiness'], downcast="float")

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,25),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('Northern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")

# Save the figure
plt.savefig("../output_data/NH_Lat_vs_Clouds.png")

plt.show()

##### The cloudiness does not fit the regression line which indicates a poor correlation or almost non existant.

####  Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(south_hem_df['Lat'], downcast="float")
y_values = pd.to_numeric(south_hem_df['Cloudiness'], downcast="float")

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-55,22),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Cloudiness (%)')
plt.title('Southern Hemisphere - Cloudiness (%) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")

# Save the figure
plt.savefig("../output_data/SH_Lat_vs_Clouds.png")

plt.show()

##### The cloudiness does not fit the regression line which indicates a poor correlation or almost non existant.

####  Northern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(north_hem_df['Lat'], downcast="float")
y_values = pd.to_numeric(north_hem_df['Wind Speed'], downcast="float")

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(45,30),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed(mph)')
plt.title('Northern Hemisphere - Wind Speed(mph) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")

# Save the figure
plt.savefig("../output_data/NH_Lat_vs_Wind.png")

plt.show()

##### The wind speed does not fit the regression line which indicates a poor correlation.

####  Southern Hemisphere - Wind Speed (mph) vs. Latitude Linear Regression

In [None]:
x_values = pd.to_numeric(south_hem_df['Lat'], downcast="float")
y_values = pd.to_numeric(south_hem_df['Wind Speed'], downcast="float")

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x + " + str(round(intercept,2))

print(f"Regression line equation is: {line_eq}")
plt.scatter(x_values,y_values)
plt.plot(x_values,regress_values,"r-")
plt.annotate(line_eq,(-25,25),fontsize=15,color="red")
plt.xlabel('Latitude')
plt.ylabel('Wind Speed(mph)')
plt.title('Southern Hemisphere - Wind Speed(mph) vs. Latitude Linear Regression')
print(f"The r-squared is: {rvalue}")

# Save the figure
plt.savefig("../output_data/SH_Lat_vs_Wind.png")

plt.show()

##### The windspeed does not fit the regression line which indicates a poor correlation but it is higher than the northern hemisphere