In [88]:
import pandas as pd
import matplotlib.pyplot as plt
import plotly.express as px
import calendar

## Reading climate data joined

In [79]:
df = pd.read_csv("../data/preproccesed/Climate_Data_joined.csv", index_col=0, parse_dates=["local_time"])
df["month"] = df["local_time"].dt.month


Columns (12) have mixed types. Specify dtype option on import or set low_memory=False.



## Creating a time series of recordings per month of the sensor
Nightmare job was to fill in the missing months, as some sensors, did not record data for all months, hence the very nice code below ;)

In [121]:
month_df = df.groupby("site_id").agg({"month": "value_counts"})
month_df.columns = ["count"]
month_df.reset_index(inplace=True)
sites = month_df.site_id.unique()
dates = [*range(1, 13)]
idx = pd.MultiIndex.from_product((dates, sites), names=['month', 'site_id'])
month_df = month_df.set_index(["month", "site_id"]).reindex(idx, fill_value=0)
month_df = month_df.sort_values(["site_id", "month"]).reset_index()
month_df["month_name"] = pd.to_datetime(month_df.month, format="%m").dt.month_name()
month_df.to_csv("../web/data/monthly_sensor_data.csv")
# month_df.shape, df.site_id.unique().size, df.site_id.unique().size * 12

In [122]:
df_test = pd.DataFrame([1, 2, 3], columns=["month"])
subset_month = month_df.head()
# pd.to_datetime(subset_month.month, format="%m").dt.month_name()
# pd.to_datetime(month_df.month, format="%m").dt.month_name()
month_df

Unnamed: 0,month,site_id,count,month_name
0,1,1001,47466,January
1,2,1001,44208,February
2,3,1001,48312,March
3,4,1001,44328,April
4,5,1001,39090,May
...,...,...,...,...
187,8,1016,0,August
188,9,1016,8236,September
189,10,1016,23782,October
190,11,1016,23314,November


## Getting the unique site_ids
Wanted to be sure, that the site id_s for some reason didn't have multiple longitudes and latitudes, so took that into account too.

In [81]:
# Get unique combinations
col_names = ["latitude_sensor","longitude_sensor", "site_id"] 
df_locations = df.groupby(col_names)[col_names].size().reset_index(name="count")
df_locations.to_csv("../web/data/locations.csv")
df_locations.head()

Unnamed: 0,latitude_sensor,longitude_sensor,site_id,count
0,-37.8225,144.952222,1010,240167
1,-37.822486,144.952065,1006,31392
2,-37.82246,144.951835,1007,272184
3,-37.822222,144.952222,1011,190870
4,-37.8175,144.967222,1014,168268


## Test of geoplot

In [82]:
fig = px.scatter_mapbox(df_locations,
                     lat="latitude_sensor",
                     lon="longitude_sensor",
                     hover_data={'latitude_sensor': False,
                                 "longitude_sensor": False,
                                 "count": ":100.0f",
                                 "site_id": True},
                     color_discrete_sequence=["darkgreen"],
                     size="count",
                     size_max=20,
                     zoom=13)
fig.update_layout(mapbox_style="open-street-map")
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})

fig.show()