In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import json

In [4]:
"""

This code is an interactive animated map visualization comparing actual vs Predicted hotspot data 
for a given period of time. The animation package provided by Plotly is used in this code to create the visualization.

The following tasks are done by the below script:

1. Open a CSV file with the hotspot data and perform preprocessing.
2. Apply a date range filter to the data.
3. Convert the Landkreis (county) IDs into a string format that adds 0s in front of each 4 digit ids.
4. Load a GeoJSON file for mapping the county boundaries of Germany.
6. Establish a unique color scheme for the map display.
7. Make two map layers and animation frames for every date within the given range:
    a. Actual hotspot variable.
    b. Predicted 7-days lagged hotspot varaiable.
8. Set up subplots for the expected and actual data in the Plotly graphic.
9. Adjust map parameters and incorporate the generated frames into the diagram.
10. To control the animation, include play/pause buttons and a timeslider.
11. Adjust the title and other layout parameters for improved display.
12. Will save the final visualization (html file) in a location.

Setting parameters
- start_date: The date on which data filtering began.
- end_date: The date by which data must be filtered.
- data_file: The location of the hotspot data-containing CSV file.
- geojson_file: The path to the county boundary mapping GeoJSON file.
- output_html: The path to the HTML file that will be saved as the output.

Returns: - An animated map that compares actual and predicted hotspots in an interactive HTML file.


Note: - The script counts on the existence of three columns: "Date," "Landkreis_id,", "Hotspot", "Predicted Value Binary"


"""


data = pd.read_csv('path/to/your/inputfile.csv')

data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')
data.sort_values(by='Date', inplace=True)

start_date = '2020-01-01'
end_date = '2020-12-31'
data = data[(data['Date'] >= start_date) & (data['Date'] <= end_date)]

data['Landkreis_id'] = data['Landkreis_id'].apply(lambda x: str(x).zfill(5))
data['Predicted Value Binary'] = data['Predicted Value'].apply(lambda x: 1 if x >= 0.5 else 0)

with open('C:\\Users\\Suji ramesh\\Desktop\\Thesis\\data\\lk_germany_reduced.geojson', 'r') as f:
    counties_geojson = json.load(f)
custom_colorscale = [[0, "rgb(102, 194, 165)"], [1, "rgb(252, 141, 98)"]]
dates = pd.date_range(start=start_date, end=end_date).strftime('%Y-%m-%d').tolist()
frames = []
for date in dates:
    filtered_data = data[data['Date'] == pd.to_datetime(date)]
    if not filtered_data.empty:
        frames.append(
            go.Frame(
                data=[
                    go.Choroplethmapbox(
                        geojson=counties_geojson,
                        locations=filtered_data['Landkreis_id'],
                        z=filtered_data['Hotspot'],
                        featureidkey="properties.RS",
                        colorscale=custom_colorscale,
                        zmin=0,
                        zmax=1,
                        showscale=False  # Removed colorscale for binary values
                    ),
                    go.Choroplethmapbox(
                        geojson=counties_geojson,
                        locations=filtered_data['Landkreis_id'],
                        z=filtered_data['Predicted Value Binary'],
                        featureidkey="properties.RS",
                        colorscale=custom_colorscale,
                        zmin=0,
                        zmax=1,
                        showscale=False  # Removed colorscale for binary values
                    )
                ],
                name=date
            )
        )

fig = make_subplots(rows=1, cols=2, subplot_titles=("Hotspot", "Predicted Hotspot for next week"),
                    specs=[[{"type": "choroplethmapbox"}, {"type": "choroplethmapbox"}]])

fig.update_mapboxes(
    bearing=0,
    pitch=0,
    zoom=5,
    center={"lat": 51.1657, "lon": 10.4515},
    style="carto-positron"
)

fig.add_traces(frames[0].data, rows=[1, 1], cols=[1, 2])

fig.frames = frames

formatted_dates = [pd.to_datetime(d).strftime('%Y-%m-%d (%a)') for d in dates]  # Including day of the week
slider_steps = [
    dict(label=formatted_dates[i], method='animate', args=[[frames[i].name],
        dict(mode='immediate', frame=dict(duration=300), transition=dict(duration=0))])
    for i in range(len(frames))
]

fig.update_layout(
    sliders=[{
        'steps': slider_steps,
        'currentvalue': {
            'visible': True,
            'prefix': 'Date: ',
            'xanchor': 'right'
        },
        'transition': {'duration': 300},
        'font': {'size': 12},
        'active': 0,
        'x': 0.1,
        'len': 0.8,
        'xanchor': 'left',
        'yanchor': 'top',
        'pad': {'b': 10, 't': 50},
    }],
    title='Hotspot vs Predicted Hotspot for next week',
    updatemenus=[dict(
              
        type="buttons",
        direction="left",
        x=0.1, 
        y=-0.1, 
        xanchor='left',
        yanchor='top',
        buttons=[dict(label="Play",
                      method="animate",
                      args=[None,
                            dict(frame=dict(duration=300, redraw=True),
                                 fromcurrent=True)]),
                 dict(label="Pause",
                      method="animate",
                      args=[[None],
                            dict(frame=dict(duration=0, redraw=False),
                                 mode="immediate")])]
        
    )]
)      


fig.update_layout(
    margin=dict(l=0, r=0, t=50, b=0), 
    mapbox=dict(
        bearing=0,
        pitch=0,
        zoom=5,
        center={"lat": 51.1657, "lon": 10.4515},
        style="carto-positron"
    )
)

fig.write_html("path/to/your/ouputfile/toSave.csv")