In [1]:
from sklearn.neighbors import NearestNeighbors
from kneed import KneeLocator
import numpy as np
from plotly.subplots import make_subplots
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import DBSCAN
import functions as f
from sklearn.cluster import KMeans
import plotly.express as px
import plotly.graph_objects as go
import scipy.cluster.hierarchy as sch
from sklearn.cluster import AgglomerativeClustering
import geopandas as gpd
from pyproj import CRS
from shapely.geometry import Point, Polygon
import matplotlib.pyplot as plt

In [2]:
# Define features want from dataset
demo_features = ['White','Black or African American','American Indian and Alaska Native', 'Asian'
                ,'Native Hawaiian and Other Pacific Islander','Some other race','Two or more races', 'Total population']

# Read dataset for data and drop where population = 0 and unwanted feature
demo_df = f.demo_data(demo_features,'Raw Data/acs_combined.xlsx')
demo_df.drop(['Name'],axis = 1,inplace = True)
demo_df.drop(list(demo_df[demo_df['Total population']==0].index),inplace = True)

# Feature Engineering
col = demo_df.columns
demo_df['% White'] = demo_df['White']/demo_df['Total population']*100
demo_df['% African American'] = demo_df['Black or African American']/demo_df['Total population']*100
demo_df['% Asian'] = demo_df['Asian']/demo_df['Total population']*100
demo_df['% Other'] = (demo_df['Native Hawaiian and Other Pacific Islander'] + demo_df['American Indian and Alaska Native'] + demo_df['Some other race'])/demo_df['Total population']*100
demo_df['% Mixed'] = demo_df['Two or more races']/demo_df['Total population']*100
demo_df.drop(col,axis =1, inplace = True)


In [3]:
#Read CSV for constructed dataset
cluster = pd.read_csv('Clustering/NTA Code Cluster Match.csv')

In [8]:
# Merge Cluster data and demo data
merge = demo_df.merge(cluster, how = 'outer',left_index = True, right_index = True)
merge.fillna(0,inplace = True)

In [9]:
#Separating the Clsuters form the DataFrame
cluster_1 = merge[merge['Cluster']==1]
cluster_2 = merge[merge['Cluster']==2]
cluster_3 = merge[merge['Cluster']==3]
cluster_4 = merge[merge['Cluster']==4]
cluster_5 = merge[merge['Cluster']==5]

In [10]:
cluster_1.describe()

Unnamed: 0,% White,% African American,% Asian,% Other,% Mixed,Cluster
count,120.0,120.0,120.0,120.0,120.0,120.0
mean,0.0,0.0,0.0,0.0,0.0,1.0
std,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,1.0
25%,0.0,0.0,0.0,0.0,0.0,1.0
50%,0.0,0.0,0.0,0.0,0.0,1.0
75%,0.0,0.0,0.0,0.0,0.0,1.0
max,0.0,0.0,0.0,0.0,0.0,1.0


In [11]:
cluster_2.describe()

Unnamed: 0,% White,% African American,% Asian,% Other,% Mixed,Cluster
count,23.0,23.0,23.0,23.0,23.0,23.0
mean,0.0,0.0,0.0,0.0,0.0,2.0
std,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,2.0
25%,0.0,0.0,0.0,0.0,0.0,2.0
50%,0.0,0.0,0.0,0.0,0.0,2.0
75%,0.0,0.0,0.0,0.0,0.0,2.0
max,0.0,0.0,0.0,0.0,0.0,2.0


In [12]:
cluster_3.describe()

Unnamed: 0,% White,% African American,% Asian,% Other,% Mixed,Cluster
count,6.0,6.0,6.0,6.0,6.0,6.0
mean,0.0,0.0,0.0,0.0,0.0,3.0
std,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,3.0
25%,0.0,0.0,0.0,0.0,0.0,3.0
50%,0.0,0.0,0.0,0.0,0.0,3.0
75%,0.0,0.0,0.0,0.0,0.0,3.0
max,0.0,0.0,0.0,0.0,0.0,3.0


In [13]:
cluster_4.describe()

Unnamed: 0,% White,% African American,% Asian,% Other,% Mixed,Cluster
count,39.0,39.0,39.0,39.0,39.0,39.0
mean,0.0,0.0,0.0,0.0,0.0,4.0
std,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,4.0
25%,0.0,0.0,0.0,0.0,0.0,4.0
50%,0.0,0.0,0.0,0.0,0.0,4.0
75%,0.0,0.0,0.0,0.0,0.0,4.0
max,0.0,0.0,0.0,0.0,0.0,4.0


In [14]:
cluster_5.describe()

Unnamed: 0,% White,% African American,% Asian,% Other,% Mixed,Cluster
count,7.0,7.0,7.0,7.0,7.0,7.0
mean,0.0,0.0,0.0,0.0,0.0,5.0
std,0.0,0.0,0.0,0.0,0.0,0.0
min,0.0,0.0,0.0,0.0,0.0,5.0
25%,0.0,0.0,0.0,0.0,0.0,5.0
50%,0.0,0.0,0.0,0.0,0.0,5.0
75%,0.0,0.0,0.0,0.0,0.0,5.0
max,0.0,0.0,0.0,0.0,0.0,5.0
