In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
from IPython.display import display
from sklearn.cluster import KMeans
from IPython.display import clear_output

# Assuming you have a DataFrame named df with the given attributes
df = pd.read_csv('concatenated_data.csv')
# Convert 'year', 'month', and 'day' columns to datetime format
df['date'] = pd.to_datetime(df[['year', 'month', 'day']])

station_names = {
    1: 'Aotizhongxin',
    2: 'Changping',
    3: 'Dingling',
    4: 'Dongsi',
    5: 'Guanyuan',
    6: 'Gucheng',
    7: 'Huairou',
    8: 'Nongzhanguan',
    9: 'Shunyi',
    10: 'Tiantan',
    11: 'Wanliu',
    12: 'Wanshouxigong',
}

# Create widgets
station_dropdown = widgets.Dropdown(
    options=[(station_names[i], i) for i in range(1, 13)],
    value=1,
    description='Station:'
)

attribute_multiselect = widgets.SelectMultiple(
    options=['PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3', 'TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM'],
    value=['PM2.5', 'PM10', 'SO2'],
    description='Attributes:'
)

num_clusters_spinner = widgets.IntSlider(
    value=3,
    min=2,
    max=10,
    step=1,
    description='Clusters:'
)

from ipywidgets import SelectionRangeSlider, HBox, VBox, Label, Layout

date_range_slider = widgets.SelectionRangeSlider(
    options=pd.date_range(df['date'].min(), df['date'].max(), freq='D'),
    index=(0, len(pd.date_range(df['date'].min(), df['date'].max(), freq='D')) - 1),
    description='Date Range:',
    continuous_update=False,
    layout={'width': '600px'},
    readout = False
)
# increase the width of the display area by updating its style
date_range_slider.style.handle_width = '50px'
date_range_slider.style.readout_width = '500px'

# create a Label widget to display the selected range
selected_info = Label(
    value=f'Selected Range: {date_range_slider.value[0]} - {date_range_slider.value[1]}',
    layout=Layout(margin='0 0 0 0px', width='500px')
)

# update the selected range when the value of the SelectionRangeSlider changes
def update_info(change):
    selected_info.value = f'Selected Range: {date_range_slider.value[0]} - {date_range_slider.value[1]}'
date_range_slider.observe(update_info, 'value')

# display the widgets in an HBox
hbox = HBox([date_range_slider, selected_info])

# display the HBox in a VBox to allow for additional widgets below
vbox = VBox([hbox])

# Create an additional dropdown widget for the y-axis attribute selection
yaxis_attribute_dropdown = widgets.Dropdown(
    options=['PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3', 'TEMP', 'PRES', 'DEWP', 'RAIN', 'WSPM'],
    value='PM2.5',
    description='Y-axis:'
)

# Modify the create_time_series_plot function to include the y_axis parameter
def create_time_series_plot(station, attributes, num_clusters, date_range, y_axis):
    clear_output(wait=True)
    start_date, end_date = date_range
    filtered_df = df[(df['station'] == station) & (df['date'].between(start_date, end_date))].copy()
    
    # Perform clustering
    clustering_data = filtered_df[list(attributes)]
    kmeans = KMeans(n_clusters=num_clusters, random_state=0).fit(clustering_data)
    filtered_df['cluster'] = kmeans.labels_

    fig, ax = plt.subplots(figsize=(12, 6))

    for cluster in range(num_clusters):
        cluster_df = filtered_df[filtered_df['cluster'] == cluster]
        ax.scatter(cluster_df['date'], cluster_df[y_axis], s=3, label=f'Cluster {cluster}')

    ax.set(xlabel='Date', ylabel=y_axis)
    ax.legend()
    plt.show()

# Update the interactive output to include the y_axis attribute
output = widgets.interactive_output(
    create_time_series_plot,
    {
        'station': station_dropdown,
        'attributes': attribute_multiselect,
        'num_clusters': num_clusters_spinner,
        'date_range': date_range_slider,
        'y_axis': yaxis_attribute_dropdown
    }
)

# Display the new y-axis attribute dropdown along with the other widgets and output
display(station_dropdown, attribute_multiselect, num_clusters_spinner, vbox, yaxis_attribute_dropdown, output)


Dropdown(description='Station:', options=(('Aotizhongxin', 1), ('Changping', 2), ('Dingling', 3), ('Dongsi', 4…

SelectMultiple(description='Attributes:', index=(0, 1, 2), options=('PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3',…

IntSlider(value=3, description='Clusters:', max=10, min=2)

VBox(children=(HBox(children=(SelectionRangeSlider(continuous_update=False, description='Date Range:', index=(…

Dropdown(description='Y-axis:', options=('PM2.5', 'PM10', 'SO2', 'NO2', 'CO', 'O3', 'TEMP', 'PRES', 'DEWP', 'R…

Output()

Drop wd, because it's catagorical