In [1]:
# Dependencies
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests
from census import Census
import gmaps
from scipy.stats import linregress

# Census & gmaps API Keys
from config import (api_key, gkey)
c = Census(api_key, year=2020)

# Configure gmaps
gmaps.configure(api_key=gkey)

ModuleNotFoundError: No module named 'config'

In [None]:
# Load the combined CSV file
combined_df = pd.read_csv("combined.csv")

# Display sample data
combined_df

In [None]:
# Graph and linear regression on daily_solar_radiation.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["daily_solar_radiation"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 4.15), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Daily Solar Radiation")
plt.title(f"Number of Residential Solar Systems vs. Daily Solar Radiation", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_daily_solar_radiation.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on electricity_price_residential.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["electricity_price_residential"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 13), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Residential Electricity Price ($)")
plt.title(f"Number of Residential Solar Systems vs. Residential Electricity Price ($)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_electricity_price_residential.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on square_miles.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["square_miles"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 75000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("State/District Area (Square Miles)")
plt.title(f"Number of Residential Solar Systems vs. State/District Area (Square Miles)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_square_miles.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on pop_density.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["pop_density"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 3000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Population Density (People Per Square Mile)")
plt.title(f"Number of Residential Solar Systems vs. Population Density (People Per Square Mile)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_pop_density.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on population.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["Population"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 3000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Population ")
plt.title(f"Number of Residential Solar Systems vs. Population", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_population.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on Median Age.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["Median Age"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 35), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Median Age")
plt.title(f"Number of Residential Solar Systems vs. Median Age", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_median_age.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on Household Income.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["Household Income"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 55000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Household Income ($)")
plt.title(f"Number of Residential Solar Systems vs. Household Income", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_household_income.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on Per Capita Income.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["Per Capita Income"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 52000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Per Capita Income ($)")
plt.title(f"Number of Residential Solar Systems vs. Per Capita Income ($)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_per_capita_income.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on Poverty Count.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["Poverty Count"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 52000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Number of People Living in Poverty")
plt.title(f"Number of Residential Solar Systems vs. Number of People Living in Poverty", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_poverty_count.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on Poverty Rate.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["Poverty Rate"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 11), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Poverty Rate (%)")
plt.title(f"Number of Residential Solar Systems vs. Poverty Rate (%)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_poverty_rate.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on Unemployment Rate.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["Unemployment Rate"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 2.25), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Unemployment Rate (%)")
plt.title(f"Number of Residential Solar Systems vs. Unemployment Rate (%)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_unemployment_rate.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on number_of_years_of_education.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["number_of_years_of_education"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 13.1), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Number of Years of Education")
plt.title(f"Number of Residential Solar Systems vs. Number of Years of Education", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_number_of_years_of_education.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on number_of_solar_system_per_household.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["number_of_solar_system_per_household"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, .0075), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Number of Solar Systems per Household")
plt.title(f"Number of Residential Solar Systems vs. Solar Systems per Household", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_number_of_solar_systems_per_household.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on housing_unit_median_value.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["housing_unit_median_value"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 175000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Median Housing Unit Value ($)")
plt.title(f"Number of Residential Solar Systems vs. Median Housing Unit Value ($)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_housing_unit_median_value.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on total_panel_area_residential.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["total_panel_area_residential"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 52000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Total Residential Solar Panel Area (sq. ft.)")
plt.title(f"Number of Residential Solar Systems vs. Total Residential Solar Panel Area (sq. ft.)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_total_panel_area_residential.png")

# Show plot
plt.show()

In [None]:
# Graph and linear regression on owned_households_%.
x_values = combined_df["solar_system_count_residential"]
y_values = combined_df["owned_households_%"]

# Get linear regression
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = x_values * slope + intercept
line_eq = "y = " + str(round(slope,2)) + "x +" + str(round(intercept,2))

# Build linear regression plot
plt.scatter(x_values, y_values, alpha = 0.7, edgecolors = "k")
plt.plot(x_values, regress_values, "r")
plt.annotate(line_eq, (350000, 52000), fontsize = 10, color = "red")
plt.xlabel("Number of Residential Solar Systems")
plt.ylabel("Households Owned (%)")
plt.title(f"Number of Residential Solar Systems vs. Households Owned (%)", fontsize = 10)
plt.grid(True)
print(f"The r-value is: {rvalue**2}") 

# Save linear regression plot as a png
plt.savefig("output_data/RegressionPlot_solar_system_count_residential_vs_owned_households_%.png")

# Show plot
plt.show()