In [1]:
import folium
import pandas as pd
import geopandas
import requests

In [77]:
from geopy.geocoders import Nominatim
from shapely.geometry import Point

def get_city_geometry(city, state):
    """
    Get the geometry (latitude and longitude) of a city using its name and state.
    
    Args:
    - city (str): Name of the city
    - state (str): Name of the state
    
    Returns:
    - Point: Geometry of the city as a Point object (latitude, longitude)
    """
    # Concatenate the city name and state
    location_query = f"{city}, {state}"
    
    # Initialize Nominatim geocoder
    geolocator = Nominatim(user_agent="city_geometry_app")
    
    # Try to geocode the location
    try:
        location = geolocator.geocode(location_query)
        if location:
            # Extract latitude and longitude from the geocoded result
            latitude = location.latitude
            longitude = location.longitude
            
            # Create a Point object representing the city's geometry
            city_geometry = Point(longitude, latitude)
            return city_geometry
        else:
            print(f"Location not found for {location_query}")
            return None
    except Exception as e:
        print(f"Error occurred: {e}")
        return None

# Example usage:
city_geometry = get_city_geometry("Huntington", "OH")
print(city_geometry)  # Output: POINT (-74.0060152 40.7127281)


POINT (-84.24755618787373 39.2521457)


In [147]:
data = pd.read_csv('Resources/data/Metro_med_doz_pending_uc_sfrcondo_sm_month.csv')
data.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,2018-03-31,2018-04-30,2018-05-31,2018-06-30,2018-07-31,...,2023-04-30,2023-05-31,2023-06-30,2023-07-31,2023-08-31,2023-09-30,2023-10-31,2023-11-30,2023-12-31,2024-01-31
0,102001,0,United States,country,,31.0,21.0,19.0,18.0,20.0,...,13.0,11.0,10.0,11.0,12.0,13.0,15.0,17.0,22.0,27.0
1,394913,1,"New York, NY",msa,NY,58.0,43.0,38.0,40.0,43.0,...,27.0,23.0,23.0,24.0,26.0,27.0,27.0,28.0,32.0,37.0
2,753899,2,"Los Angeles, CA",msa,CA,17.0,14.0,15.0,15.0,17.0,...,16.0,13.0,13.0,13.0,13.0,14.0,15.0,16.0,18.0,19.0
3,394463,3,"Chicago, IL",msa,IL,32.0,17.0,15.0,16.0,18.0,...,9.0,7.0,6.0,7.0,8.0,9.0,10.0,12.0,16.0,18.0
4,394514,4,"Dallas, TX",msa,TX,27.0,21.0,20.0,20.0,22.0,...,12.0,10.0,10.0,12.0,14.0,17.0,19.0,22.0,27.0,32.0


In [148]:
us_data = pd.DataFrame(data.loc[0][5:]).reset_index(drop=False).rename(columns={'index':'Date', 0: 'DaysOnMarket'})
us_data.to_csv('Resources/data/us_data.csv', index=False)

In [149]:
us_data = pd.read_csv('Resources/data/us_data.csv')
us_data.head()


Unnamed: 0,Date,DaysOnMarket
0,2018-03-31,31.0
1,2018-04-30,21.0
2,2018-05-31,19.0
3,2018-06-30,18.0
4,2018-07-31,20.0


In [226]:
states_data = data.loc[1:].copy()
states_data.loc[states_data['RegionName']=='Washington, DC', 'StateName'] = 'DC'
states_data.loc[:, 'RegionName'] = states_data['RegionName'].apply(lambda x: x.split(',')[0])

us_cities = pd.read_csv('Resources/data/uscities.csv')
us_cities = us_cities.drop_duplicates(['city', 'state_id'])
us_cities = us_cities[['city', 'state_id', 'state_name', 
                       'lat', 'lng', 'population', 
                       'density' ]]

states_data = states_data.merge(us_cities, 
                                left_on=['RegionName', 'StateName'], 
                                right_on=['city', 'state_id'])

states_data['geometry'] = states_data.apply(lambda x: Point(x['lng'], x['lat']), axis=1)


# states_data.to_csv('Resources/data/states_data.csv', index=False)


states_data.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,2018-03-31,2018-04-30,2018-05-31,2018-06-30,2018-07-31,...,2023-12-31,2024-01-31,city,state_id,state_name,lat,lng,population,density,geometry
0,394913,1,New York,msa,NY,58.0,43.0,38.0,40.0,43.0,...,32.0,37.0,New York,NY,New York,40.6943,-73.9249,18908608,11080.3,POINT (-73.9249 40.6943)
1,753899,2,Los Angeles,msa,CA,17.0,14.0,15.0,15.0,17.0,...,18.0,19.0,Los Angeles,CA,California,34.1141,-118.4068,11922389,3184.7,POINT (-118.4068 34.1141)
2,394463,3,Chicago,msa,IL,32.0,17.0,15.0,16.0,18.0,...,16.0,18.0,Chicago,IL,Illinois,41.8375,-87.6866,8497759,4614.5,POINT (-87.6866 41.8375)
3,394514,4,Dallas,msa,TX,27.0,21.0,20.0,20.0,22.0,...,27.0,32.0,Dallas,TX,Texas,32.7935,-96.7667,5830932,1478.7,POINT (-96.7667 32.7935)
4,394692,5,Houston,msa,TX,27.0,19.0,16.0,17.0,19.0,...,30.0,36.0,Houston,TX,Texas,29.786,-95.3885,5970127,1384.0,POINT (-95.3885 29.786)


In [342]:
df = states_data.loc[(states_data['RegionName']=='Cleveland')]
df = pd.DataFrame(df.iloc[:, 5:-8].T).reset_index(drop=False)
# df.columns = ['Date', 'DaysOnMarket']
df

Unnamed: 0,index,33,312
0,2018-03-31,51.0,
1,2018-04-30,40.0,
2,2018-05-31,33.0,
3,2018-06-30,32.0,
4,2018-07-31,34.0,
...,...,...,...
66,2023-09-30,6.0,14.0
67,2023-10-31,7.0,12.0
68,2023-11-30,9.0,17.0
69,2023-12-31,12.0,27.0


In [343]:
a, b = (1, 3)

In [225]:
states_data.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,2018-03-31,2018-04-30,2018-05-31,2018-06-30,2018-07-31,...,2023-12-31,2024-01-31,city,state_id,state_name,lat,lng,population,density,geometry
0,394913,1,New York,msa,NY,58.0,43.0,38.0,40.0,43.0,...,32.0,37.0,New York,NY,New York,40.6943,-73.9249,18908608,11080.3,POINT (-73.9249 40.6943)
1,753899,2,Los Angeles,msa,CA,17.0,14.0,15.0,15.0,17.0,...,18.0,19.0,Los Angeles,CA,California,34.1141,-118.4068,11922389,3184.7,POINT (-118.4068 34.1141)
2,394463,3,Chicago,msa,IL,32.0,17.0,15.0,16.0,18.0,...,16.0,18.0,Chicago,IL,Illinois,41.8375,-87.6866,8497759,4614.5,POINT (-87.6866 41.8375)
3,394514,4,Dallas,msa,TX,27.0,21.0,20.0,20.0,22.0,...,27.0,32.0,Dallas,TX,Texas,32.7935,-96.7667,5830932,1478.7,POINT (-96.7667 32.7935)
4,394692,5,Houston,msa,TX,27.0,19.0,16.0,17.0,19.0,...,30.0,36.0,Houston,TX,Texas,29.786,-95.3885,5970127,1384.0,POINT (-95.3885 29.786)


In [191]:
state_data.head()

Unnamed: 0,State,Unemployment
0,AL,7.1
1,AK,6.8
2,AZ,8.1
3,AR,7.2
4,CA,10.1


In [231]:
states_data.geometry.isna().sum()

0

In [188]:
state_geo = requests.get(
    "https://raw.githubusercontent.com/python-visualization/folium-example-data/main/us_states.json"
).json()
# state_data = pd.read_csv(
#     "https://raw.githubusercontent.com/python-visualization/folium-example-data/main/us_unemployment_oct_2012.csv"
# )


In [236]:
states_data.head()

Unnamed: 0,RegionID,SizeRank,RegionName,RegionType,StateName,2018-03-31,2018-04-30,2018-05-31,2018-06-30,2018-07-31,...,2023-12-31,2024-01-31,city,state_id,state_name,lat,lng,population,density,geometry
0,394913,1,New York,msa,NY,58.0,43.0,38.0,40.0,43.0,...,32.0,37.0,New York,NY,New York,40.6943,-73.9249,18908608,11080.3,POINT (-73.9249 40.6943)
1,753899,2,Los Angeles,msa,CA,17.0,14.0,15.0,15.0,17.0,...,18.0,19.0,Los Angeles,CA,California,34.1141,-118.4068,11922389,3184.7,POINT (-118.4068 34.1141)
2,394463,3,Chicago,msa,IL,32.0,17.0,15.0,16.0,18.0,...,16.0,18.0,Chicago,IL,Illinois,41.8375,-87.6866,8497759,4614.5,POINT (-87.6866 41.8375)
3,394514,4,Dallas,msa,TX,27.0,21.0,20.0,20.0,22.0,...,27.0,32.0,Dallas,TX,Texas,32.7935,-96.7667,5830932,1478.7,POINT (-96.7667 32.7935)
4,394692,5,Houston,msa,TX,27.0,19.0,16.0,17.0,19.0,...,30.0,36.0,Houston,TX,Texas,29.786,-95.3885,5970127,1384.0,POINT (-95.3885 29.786)


In [286]:
cities_geo = states_data[['RegionName', 'StateName', 'population', 'density', 'lng', 'lat', '2024-01-31']]
# Create Point geometries from latitude and longitude
geometry = [Point(xy) for xy in zip(cities_geo['lng'], cities_geo['lat'])]

# Convert DataFrame to GeoDataFrame
gdf = gpd.GeoDataFrame(cities_geo, geometry=geometry)

# Set the CRS for the GeoDataFrame
gdf.crs = 'EPSG:4326'  # Assuming WGS84 coordinate reference system

# Drop the latitude and longitude columns if needed
gdf = gdf.drop(['lat', 'lng'], axis=1)


In [197]:
state_level_data = states_data.groupby(['StateName']) \
           .apply(lambda x: pd.Series({'DaysOnMarket': x['2024-01-31'].mean()}))\
           .reset_index()

state_level_data.head()

Unnamed: 0,StateName,DaysOnMarket
0,AK,18.0
1,AL,32.727273
2,AR,38.727273
3,AZ,46.7
4,CA,30.5


In [346]:
states_data.loc[(states_data['RegionName']=='Cleveland')&\
                                            (states_data['StateName']=='OH')]['state_name'].values[0]

'Ohio'

In [315]:
def get_state_level_data(df, date):
    state_level_data = df.groupby(['StateName']) \
            .apply(lambda x: pd.Series({
                'DaysOnMarket': x[date].mean(),
                'Population': x['population'].mean(),
                'Density': x['density'].mean(),
                }))\
            .reset_index()
    return state_level_data


In [319]:
get_state_level_data(states_data[states_data['state_name']=='Texas'], '2024-01-31')#['DaysOnMarket'].values[0]

Unnamed: 0,StateName,DaysOnMarket,Population,Density
0,TX,46.666667,531408.307692,633.833333


In [219]:
def get_avg_daysOnMarket(df, date_col):
    state_level_data = states_data.groupby(['StateName']) \
           .apply(lambda x: pd.Series({'DaysOnMarket': x[col].mean()}))\
           .reset_index()

    return state_level_data


In [220]:
from pprint import pprint

In [261]:
# import geopandas

# gdf = geopandas.read_file(
#     "https://raw.githubusercontent.com/python-visualization/folium-example-data/main/subway_stations.geojson"
# )

# gdf.head()

Unnamed: 0,name,url,line,objectid,notes,geometry
0,Astor Pl,http://web.mta.info/nyct/service/,4-6-6 Express,1,"4 nights, 6-all times, 6 Express-weekdays AM s...",POINT (-73.99107 40.73005)
1,Canal St,http://web.mta.info/nyct/service/,4-6-6 Express,2,"4 nights, 6-all times, 6 Express-weekdays AM s...",POINT (-74.00019 40.71880)
2,50th St,http://web.mta.info/nyct/service/,1-2,3,"1-all times, 2-nights",POINT (-73.98385 40.76173)
3,Bergen St,http://web.mta.info/nyct/service/,2-3-4,4,"4-nights, 3-all other times, 2-all times",POINT (-73.97500 40.68086)
4,Pennsylvania Ave,http://web.mta.info/nyct/service/,3-4,5,"4-nights, 3-all other times",POINT (-73.89489 40.66471)


In [259]:
gdf.dtypes

RegionName      object
StateName       object
population       int64
density        float64
2024-01-31     float64
geometry      geometry
dtype: object

In [310]:
m = folium.Map(location=[38, -102], zoom_start=4, scrollWheelZoom=False)

choropleth = folium.Choropleth(
    geo_data=state_geo,
    name="choropleth",
    data=state_level_data,
    columns=["StateName", "DaysOnMarket"],
    key_on="feature.id",
    fill_color="PuBuGn",
    fill_opacity=0.7,
    line_opacity=0.2,
    legend_name="Day On Market",
)

choropleth.geojson.add_to(m)

for feature in choropleth.geojson.data['features']:
    state_name = feature['id']
    DaysOnMarket = state_level_data.loc[state_level_data['StateName']==state_name, 'DaysOnMarket'].values[0]
    feature['properties']['DaysOnMarket'] = f'State Avg DaysOnMarket: {DaysOnMarket:.2f}'
    

choropleth.geojson.add_child(
    folium.features.GeoJsonTooltip(['name', 'DaysOnMarket'], labels=False)
)

# choropleth.LayerControl().add_to(m)


folium.GeoJson(
    gdf,
    name="Subway Stations",
    marker=folium.Circle(radius=4, fill_color="orange", fill_opacity=0.4, color="black", weight=1),
    tooltip=folium.GeoJsonTooltip(fields=["RegionName", 'population', 'density', '2024-01-31']),
    popup=folium.GeoJsonPopup(fields=["RegionName", 'population', 'density', '2024-01-31']),
    style_function=lambda x: {
        "radius": (x['properties']['2024-01-31'])*400,
    },
    highlight_function=lambda x: {"fillOpacity": 0.8},
    zoom_on_click=False,
).add_to(m)

# Add dark and light mode. 
folium.TileLayer('cartodbdark_matter',name="dark mode",control=True).add_to(m)
folium.TileLayer('cartodbpositron',name="light mode",control=True).add_to(m)

# We add a layer controller. 
folium.LayerControl(collapsed=True).add_to(m)

m

In [284]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# Sample DataFrame with latitude and longitude
data = {
    'City': ['New York', 'Los Angeles', 'Chicago'],
    'Latitude': [40.7128, 34.0522, 41.8781],
    'Longitude': [-74.0060, -118.2437, -87.6298]
}
df = pd.DataFrame(data)

# Create Point geometries from latitude and longitude
geometry = [Point(xy) for xy in zip(df['Longitude'], df['Latitude'])]

# Convert DataFrame to GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry=geometry)

# Drop the latitude and longitude columns if needed
gdf = gdf.drop(['Latitude', 'Longitude'], axis=1)

# Visualize the GeoDataFrame
print(gdf.head())


          City                     geometry
0     New York   POINT (-74.00600 40.71280)
1  Los Angeles  POINT (-118.24370 34.05220)
2      Chicago   POINT (-87.62980 41.87810)
