In [None]:
!pip install altair vega_datasets




Load the dataset into a Pandas DataFrame.

Create a scatter plot of sale prices versus square footage, color-coded by building type.

Create a histogram of sale dates.

Implement brushing on the scatter plot to highlight data points in the histogram.

In [None]:
import altair as alt
import pandas as pd

data = pd.read_csv('/content/sample_data/NY-House-Dataset.csv')

area_selection = alt.binding_select(options=[None] + sorted(data['LOCALITY'].unique()), name='Area')
selected_area = alt.selection_single(fields=['LOCALITY'], bind=area_selection, name='Selector')

interactive_chart = alt.Chart(data).mark_circle(opacity=0.8, stroke='black', strokeWidth=1).encode(
    x=alt.X('PROPERTYSQFT:Q', title='Property Square Feet', scale=alt.Scale(zero=False)),
    y=alt.Y('PRICE:Q', title='Price', scale=alt.Scale(type='log')),
    color=alt.Color('LOCALITY:N', legend=alt.Legend(title="Area"), scale=alt.Scale(scheme='category20b')),
    size=alt.Size('BEDS:Q', legend=alt.Legend(title="Number of Bedrooms"), scale=alt.Scale(range=[100, 1000])),
    tooltip=['BROKERTITLE', 'ADDRESS', 'PRICE', 'PROPERTYSQFT', 'BEDS', 'LOCALITY']
).add_selection(
    selected_area
).transform_filter(
    selected_area
).properties(
    width=800,
    height=600,
    title='Interactive Properties Visualization by Square Feet, Price, and Area'
).interactive()

interactive_chart.display()


In [None]:
import altair as alt
import pandas as pd

# Load your dataset
file_path = '/content/sample_data/NY-House-Dataset.csv'
data = pd.read_csv(file_path)
data = data.dropna(subset=['LATITUDE', 'LONGITUDE', 'PRICE', 'PROPERTYSQFT', 'TYPE'])

nearest = alt.selection(type='single', nearest=True, on='mouseover', fields=['LATITUDE', 'LONGITUDE'], empty='none')

# Define the selection for brushing
brush = alt.selection(type='interval')

# Base chart for the points
points = alt.Chart(data).mark_circle().encode(
    longitude='LONGITUDE:Q',
    latitude='LATITUDE:Q',
    size=alt.value(100),
    color=alt.condition(nearest, 'TYPE:N', alt.value('gray')),
    tooltip=['ADDRESS', 'PRICE', 'PROPERTYSQFT', 'TYPE']
).add_selection(
    nearest
)

# Scatter plot view
scatter_view = alt.Chart(data).mark_circle().encode(
    x=alt.X('PROPERTYSQFT:Q', scale=alt.Scale(zero=False), title='Property Square Footage'),
    y=alt.Y('PRICE:Q', scale=alt.Scale(zero=False, type='log'), title='Sale Price'),
    color=alt.Color('TYPE:N'),
    opacity=alt.condition(brush, alt.value(0.7), alt.value(0.1)),
    tooltip=['ADDRESS', 'PRICE', 'PROPERTYSQFT', 'TYPE']
).properties(
    width=600,
    height=400,
    title='Sale Price vs. Square Footage'
).add_selection(
    brush
)

# Bar chart view
bar_view = alt.Chart(data).mark_bar().encode(
    x=alt.X('count():Q', title='Number of Properties'),
    y=alt.Y('TYPE:N', sort='-x', title='Building Type'),
    color=alt.Color('TYPE:N'),
    opacity=alt.condition(brush, alt.value(0.7), alt.value(0.1))
).properties(
    width=600,
    height=400,
    title='Distribution of Properties by Building Type'
)

# Combine the views
combined_view = alt.vconcat(scatter_view, bar_view)

# Display the combined chart
combined_view


In [5]:
import altair as alt
import pandas as pd

# Load the data
file_path = '/content/sample_data/NY-House-Dataset.csv'
data = pd.read_csv(file_path)

# Calculate average price by TYPE and LOCALITY
heatmap_data = data.groupby(['TYPE', 'LOCALITY'])['PRICE'].mean().reset_index()
color_scale = alt.Scale(scheme='viridis')

# Create a heatmap chart
heatmap = alt.Chart(heatmap_data).mark_rect().encode(
    alt.X('TYPE:O', title='Property Type'),
    alt.Y('LOCALITY:O', title='Locality'),
    color=alt.Color('PRICE:Q', scale=color_scale, legend=alt.Legend(title="Average Price", format='$,.2f')),
    tooltip=[alt.Tooltip('TYPE:N', title='Type'), alt.Tooltip('LOCALITY:N', title='Locality'), alt.Tooltip('PRICE:Q', title='Average Price', format='$,.2f')]
).interactive()

final_chart = heatmap.properties(
    title="Heatmap of Average Price by Type and Locality",
    width=600,
    height=400
)

final_chart.display()


In [None]:
import altair as alt
import pandas as pd

data = pd.read_csv('/content/sample_data/NY-House-Dataset.csv')

price_min = data['PRICE'].quantile(0.01)
price_max = data['PRICE'].quantile(0.99)

type_selection = alt.selection_single(
    fields=['TYPE'],
    bind=alt.binding_select(options=[None] + sorted(data['TYPE'].unique()), name='Property Type:'),
    name='Type'
)
e
price_slider = alt.binding_range(min=price_min, max=price_max, step=10000, name='Price Range:')
price_selection = alt.selection_single(fields=['PRICE'], bind=price_slider, init={'PRICE': price_min})

# Base map layer using New York coordinates
background = alt.Chart(alt.topo_feature('https://raw.githubusercontent.com/PublicaMundi/MappingAPI/master/data/geojson/us-states.json', 'states')).mark_geoshape(
    fill='lightgray',
    stroke='white'
).transform_filter(
    (alt.datum.id == 36)  # New York's ID in the provided TopoJSON
).properties(
    width=800,
    height=500
)

# Plotting properties with filters applied
properties = alt.Chart(data).mark_circle(size=60).encode(
    latitude='LATITUDE',
    longitude='LONGITUDE',
    color=alt.Color('TYPE:N', legend=alt.Legend(title="Property Type")),
    tooltip=['BROKERTITLE', 'PRICE', 'BEDS', 'BATH', 'PROPERTYSQFT', 'ADDRESS']
).add_selection(
    type_selection,
    price_selection
).transform_filter(
    type_selection
).transform_filter(
    (alt.datum.PRICE >= price_selection.PRICE) & (alt.datum.PRICE <= price_max)
)

# Layering the map and properties
alt.layer(background, properties).display()
