In [30]:
import pandas as pd

# columns: Measurement date,Dose strength,Rain

lodz_df = pd.read_csv("data/lodz.csv")
gdynia_df = pd.read_csv("data/gdynia.csv")
krakow_df = pd.read_csv("data/krakow.csv")
warszawa_df = pd.read_csv("data/warszawa.csv")
# lublin_df = pd.read_csv("data/lublin.csv")

In [31]:
def split_df_into_12h(df):
    df["Measurement date"] = pd.to_datetime(df["Measurement date"])
    dfs = [group for _, group in df.groupby(pd.Grouper(key="Measurement date", freq="12h"))]
    return dfs

In [32]:
rain_threshold = 1

def combine_dataframes(df_list, column):
    # Initialize an empty DataFrame with the required columns
    combined_df = pd.DataFrame(columns=["window_id", "start_time", "start_time_value", "end_time", "end_time_value", "max_time", "max_value", "integral"])

    for df in df_list:
        window_id = df["window_id"].values[0]

        # Get the start time, end time, and max rain value
        start_time = df["Measurement date"].min()
        start_time_value = df[df["Measurement date"] == start_time][column].values[0]

        end_time = df["Measurement date"].max()
        end_time_value = df[df["Measurement date"] == end_time][column].values[0]

        max_rain_value = df[column].max()
        max_rain_time = df[df[column] == max_rain_value]["Measurement date"].values[0]

        integral = df[column].sum()

        # Append the information to the combined DataFrame
        new_row = pd.DataFrame({
            "window_id": [window_id],
            "start_time": [start_time],
            "start_time_value": [start_time_value],
            "end_time": [end_time],
            "end_time_value": [end_time_value],
            "max_time": [max_rain_time],
            "max_value": [max_rain_value],
            "integral": [integral]
        })
        combined_df = pd.concat([combined_df, new_row], ignore_index=True)

    return combined_df

def get_rain_intervals(intervals):

    result_rain_intervals = []
    for index in range(len(intervals)):
        interval = intervals[index]
        if interval["Rain"].max() <= rain_threshold:
            continue

        rain_intervals = []
        rain_interval = []
        for _, row in interval.iterrows():
            if row["Rain"] >= rain_threshold:
                rain_interval.append(row)
            else:
                if rain_interval:

                    interval_df = pd.DataFrame(rain_interval)
                    interval_df["window_id"] = index
                    rain_intervals.append(interval_df)
                    rain_interval = []
        if rain_intervals:
            max_rain_interval = max(
                rain_intervals, key=lambda x: max(row["Rain"] for _, row in x.iterrows())
            )
            result_rain_intervals.append(max_rain_interval)

    return combine_dataframes(result_rain_intervals, "Rain")


def get_dose_strength_intervals(intervals):
    result_dose_strength_intervals = []
    for index in range(len(intervals)):
        interval = intervals[index]
        average_dose_strength = interval["Dose strength"].mean()
        if interval["Dose strength"].max() < average_dose_strength * 1.10:
            continue

        dose_strength_intervals = []
        dose_strength_interval = []
        for _, row in interval.iterrows():
            if row["Dose strength"] >= average_dose_strength:
                dose_strength_interval.append(row)
            else:
                if dose_strength_interval:
                    interval_df = pd.DataFrame(dose_strength_interval)
                    interval_df["window_id"] = index
                    dose_strength_intervals.append(interval_df)
                    dose_strength_interval = []
        if dose_strength_intervals:
            max_dose_strength_interval = max(
                dose_strength_intervals, key=lambda x: max(row["Dose strength"] for _, row in x.iterrows())
            )
            result_dose_strength_intervals.append(max_dose_strength_interval)


    return combine_dataframes(result_dose_strength_intervals, "Dose strength")


def get_combined_intervals(df):
    intervals_12h = split_df_into_12h(df)
    rain_intervals = get_rain_intervals(intervals_12h)
    dose_strength_intervals = get_dose_strength_intervals(intervals_12h)

    max_index = max(rain_intervals["window_id"].max(), dose_strength_intervals["window_id"].max())

    combined_intervals = pd.DataFrame(
        columns=[
            "window_id",
            "start_time",
            "start_value_rain",
            "start_value_dose_strength",
            "end_time",
            "end_value_rain",
            "end_value_dose_strength",
            "max_time_rain",
            "max_value_rain",
            "max_time_dose_strength",
            "max_value_dose_strength",
            "integral_rain",
            "integral_dose_strength",
        ]
    )

    for index in range(max_index + 1):
        rain_interval = rain_intervals[rain_intervals["window_id"] == index]
        dose_strength_interval = dose_strength_intervals[dose_strength_intervals["window_id"] == index]

        if rain_interval.empty or dose_strength_interval.empty:
            continue

        start_time = min(rain_interval["start_time"].values[0], dose_strength_interval["start_time"].values[0])
        start_value_rain = rain_interval["start_time_value"].values[0]
        start_value_dose_strength = dose_strength_interval["start_time_value"].values[0]

        end_time = max(rain_interval["end_time"].values[0], dose_strength_interval["end_time"].values[0])
        end_value_rain = rain_interval["end_time_value"].values[0]
        end_value_dose_strength = dose_strength_interval["end_time_value"].values[0]

        max_time_rain = rain_interval["max_time"].values[0]
        max_value_rain = rain_interval["max_value"].values[0]
        max_time_dose_strength = dose_strength_interval["max_time"].values[0]
        max_value_dose_strength = dose_strength_interval["max_value"].values[0]

        integral_rain = rain_interval["integral"].values[0]
        integral_dose_strength = dose_strength_interval["integral"].values[0]

        new_row = pd.DataFrame(
            {
                "window_id": [index],
                "start_time": [start_time],
                "start_value_rain": [start_value_rain],
                "start_value_dose_strength": [start_value_dose_strength],
                "end_time": [end_time],
                "end_value_rain": [end_value_rain],
                "end_value_dose_strength": [end_value_dose_strength],
                "max_time_rain": [max_time_rain],
                "max_value_rain": [max_value_rain],
                "max_time_dose_strength": [max_time_dose_strength],
                "max_value_dose_strength": [max_value_dose_strength],
                "integral_rain": [integral_rain],
                "integral_dose_strength": [integral_dose_strength],
            }
        )

        combined_intervals = pd.concat([combined_intervals, new_row], ignore_index=True)

    return combined_intervals


In [33]:
lodz_combined_intervals = get_combined_intervals(lodz_df)
gdynia_combined_intervals = get_combined_intervals(gdynia_df)
krakow_combined_intervals = get_combined_intervals(krakow_df)
warszawa_combined_intervals = get_combined_intervals(warszawa_df)

lodz_combined_intervals.to_csv("out/lodz_combined_intervals.csv", index=False)
gdynia_combined_intervals.to_csv("out/gdynia_combined_intervals.csv", index=False)
krakow_combined_intervals.to_csv("out/krakow_combined_intervals.csv", index=False)
warszawa_combined_intervals.to_csv("out/warszawa_combined_intervals.csv", index=False)


display(lodz_combined_intervals)
display(gdynia_combined_intervals)
display(krakow_combined_intervals)
display(warszawa_combined_intervals)

  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_intervals = pd.concat([combined_intervals, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_intervals = pd.concat([combined_intervals, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_intervals = pd.concat([combined_intervals, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_intervals = pd.concat([combined_intervals, new_row], ignore_index=True)


Unnamed: 0,window_id,start_time,start_value_rain,start_value_dose_strength,end_time,end_value_rain,end_value_dose_strength,max_time_rain,max_value_rain,max_time_dose_strength,max_value_dose_strength,integral_rain,integral_dose_strength
0,655,2014-11-24 17:00:00,1,0.0951,2014-11-24 22:00:00,4,0.0905,2014-11-24 19:00:00,4,2014-11-24 20:00:00,0.104,7,0.4852
1,906,2015-03-30 01:00:00,1,0.0978,2015-03-30 05:00:00,1,0.1020,2015-03-30 03:00:00,5,2015-03-30 04:00:00,0.114,8,0.4218
2,965,2015-04-28 15:00:00,2,0.0984,2015-04-28 17:00:00,1,0.0967,2015-04-28 15:00:00,2,2015-04-28 16:00:00,0.107,3,0.3021
3,980,2015-05-06 04:00:00,2,0.1060,2015-05-06 08:00:00,2,0.0973,2015-05-06 04:00:00,2,2015-05-06 05:00:00,0.106,8,0.4103
4,1021,2015-05-26 12:00:00,4,0.1080,2015-05-26 17:00:00,1,0.0935,2015-05-26 12:00:00,4,2015-05-26 12:00:00,0.108,5,0.5865
...,...,...,...,...,...,...,...,...,...,...,...,...,...
152,6990,2023-07-28 04:00:00,2,0.1180,2023-07-28 06:00:00,1,0.1060,2023-07-28 04:00:00,2,2023-07-28 05:00:00,0.118,3,0.2240
153,6993,2023-07-29 19:00:00,5,0.1160,2023-07-29 22:00:00,8,0.0957,2023-07-29 20:00:00,8,2023-07-29 20:00:00,0.116,13,0.3177
154,7033,2023-08-18 12:00:00,13,0.1180,2023-08-18 15:00:00,1,0.0997,2023-08-18 12:00:00,13,2023-08-18 13:00:00,0.121,15,0.4517
155,7038,2023-08-21 05:00:00,7,0.1010,2023-08-21 07:00:00,7,0.0994,2023-08-21 05:00:00,7,2023-08-21 06:00:00,0.117,14,0.3174


Unnamed: 0,window_id,start_time,start_value_rain,start_value_dose_strength,end_time,end_value_rain,end_value_dose_strength,max_time_rain,max_value_rain,max_time_dose_strength,max_value_dose_strength,integral_rain,integral_dose_strength
0,8,2014-01-05 06:00:00,1.3,0.115,2014-01-05 10:00:00,1.0,0.118,2014-01-05 08:00:00,1.8,2014-01-05 08:00:00,0.128,4.1,0.611
1,17,2014-01-09 19:00:00,2.2,0.113,2014-01-09 22:00:00,2.2,0.122,2014-01-09 21:00:00,2.2,2014-01-09 22:00:00,0.122,2.2,0.46
2,118,2014-03-01 05:00:00,3.1,0.129,2014-03-01 08:00:00,3.7,0.121,2014-03-01 06:00:00,4.4,2014-03-01 06:00:00,0.129,11.2,0.377
3,416,2014-07-28 00:00:00,27.7,0.12,2014-07-28 03:00:00,27.7,0.115,2014-07-28 00:00:00,27.7,2014-07-28 01:00:00,0.124,27.7,0.48
4,501,2014-09-08 12:00:00,5.3,0.112,2014-09-08 15:00:00,11.7,0.114,2014-09-08 13:00:00,11.7,2014-09-08 13:00:00,0.133,17.0,0.489
5,1141,2015-07-25 13:00:00,1.8,0.112,2015-07-25 16:00:00,3.5,0.112,2015-07-25 14:00:00,3.5,2015-07-25 14:00:00,0.121,5.3,0.461
6,1167,2015-08-07 16:00:00,7.3,0.123,2015-08-07 18:00:00,7.3,0.109,2015-08-07 16:00:00,7.3,2015-08-07 16:00:00,0.123,7.3,0.352
7,1203,2015-08-25 13:00:00,2.7,0.113,2015-08-25 17:00:00,2.7,0.108,2015-08-25 13:00:00,2.7,2015-08-25 14:00:00,0.118,2.7,0.556
8,1218,2015-09-02 00:00:00,2.1,0.13,2015-09-02 02:00:00,2.1,0.114,2015-09-02 00:00:00,2.1,2015-09-02 00:00:00,0.13,2.1,0.371
9,1244,2015-09-15 00:00:00,2.8,0.117,2015-09-15 04:00:00,2.8,0.111,2015-09-15 01:00:00,2.8,2015-09-15 02:00:00,0.121,2.8,0.581


Unnamed: 0,window_id,start_time,start_value_rain,start_value_dose_strength,end_time,end_value_rain,end_value_dose_strength,max_time_rain,max_value_rain,max_time_dose_strength,max_value_dose_strength,integral_rain,integral_dose_strength
0,981,2015-05-06 12:00:00,1,0.141,2015-05-06 15:00:00,17,0.120,2015-05-06 13:00:00,17,2015-05-06 13:00:00,0.141,18,0.395
1,1008,2015-05-20 00:00:00,4,0.138,2015-05-20 02:00:00,3,0.128,2015-05-20 00:00:00,4,2015-05-20 00:00:00,0.138,8,0.394
2,1009,2015-05-20 16:00:00,5,0.124,2015-05-20 19:00:00,1,0.125,2015-05-20 16:00:00,5,2015-05-20 17:00:00,0.135,9,0.511
3,1010,2015-05-21 00:00:00,4,0.130,2015-05-21 02:00:00,1,0.117,2015-05-21 00:00:00,4,2015-05-21 00:00:00,0.130,5,0.375
4,1060,2015-06-15 01:00:00,1,0.121,2015-06-15 07:00:00,2,0.123,2015-06-15 05:00:00,4,2015-06-15 06:00:00,0.136,14,0.761
...,...,...,...,...,...,...,...,...,...,...,...,...,...
93,5599,2021-08-31 12:00:00,5,0.138,2021-08-31 14:00:00,5,0.125,2021-08-31 12:00:00,5,2021-08-31 12:00:00,0.138,5,0.400
94,5725,2021-11-02 15:00:00,2,0.143,2021-11-02 19:00:00,1,0.148,2021-11-02 16:00:00,3,2021-11-02 18:00:00,0.159,12,0.600
95,5938,2022-02-17 06:00:00,2,0.125,2022-02-17 09:00:00,7,0.128,2022-02-17 08:00:00,7,2022-02-17 08:00:00,0.136,9,0.512
96,6093,2022-05-05 19:00:00,3,0.135,2022-05-05 21:00:00,2,0.131,2022-05-05 19:00:00,3,2022-05-05 20:00:00,0.143,5,0.409


Unnamed: 0,window_id,start_time,start_value_rain,start_value_dose_strength,end_time,end_value_rain,end_value_dose_strength,max_time_rain,max_value_rain,max_time_dose_strength,max_value_dose_strength,integral_rain,integral_dose_strength
0,28,2014-01-15 00:00:00,1.1,0.1130,2014-01-15 01:00:00,1.1,0.1090,2014-01-15 00:00:00,1.1,2014-01-15 00:00:00,0.1130,1.1,0.2220
1,166,2014-03-25 00:00:00,2.0,0.1080,2014-03-25 02:00:00,1.1,0.0959,2014-03-25 00:00:00,2.0,2014-03-25 00:00:00,0.1080,3.1,0.3069
2,219,2014-04-20 15:00:00,3.5,0.1010,2014-04-20 17:00:00,1.7,0.0976,2014-04-20 15:00:00,3.5,2014-04-20 16:00:00,0.1060,5.2,0.3046
3,225,2014-04-23 12:00:00,1.0,0.0890,2014-04-23 14:00:00,3.0,0.0963,2014-04-23 13:00:00,3.0,2014-04-23 14:00:00,0.0963,4.0,0.2755
4,232,2014-04-27 00:00:00,1.5,0.1070,2014-04-27 02:00:00,1.5,0.0967,2014-04-27 01:00:00,1.5,2014-04-27 00:00:00,0.1070,1.5,0.3087
...,...,...,...,...,...,...,...,...,...,...,...,...,...
163,6185,2022-06-20 12:00:00,8.4,0.0975,2022-06-20 16:00:00,1.8,0.0998,2022-06-20 12:00:00,8.4,2022-06-20 13:00:00,0.1170,13.1,0.5293
164,6201,2022-06-28 14:00:00,1.9,0.1110,2022-06-28 18:00:00,1.9,0.1070,2022-06-28 15:00:00,5.2,2022-06-28 16:00:00,0.1180,10.0,0.4490
165,6202,2022-06-29 00:00:00,10.1,0.1180,2022-06-29 02:00:00,10.1,0.0963,2022-06-29 00:00:00,10.1,2022-06-29 00:00:00,0.1180,10.1,0.3193
166,6208,2022-07-02 00:00:00,3.8,0.1150,2022-07-02 02:00:00,3.8,0.1020,2022-07-02 00:00:00,3.8,2022-07-02 00:00:00,0.1150,3.8,0.3320


In [34]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def plot_rain(df, intervals, start_date=None, end_date=None, filename='plot', x_ticks='monthly'):
    # Filter the DataFrame based on the start and end dates
    if start_date and end_date:
        mask = (df['Measurement date'] >= start_date) & (df['Measurement date'] <= end_date)
        filtered_df = df.loc[mask]

        intervals_mask = (intervals['start_time'] >= start_date) & (intervals['end_time'] <= end_date)
        filtered_intervals = intervals.loc[intervals_mask]
    else:
        filtered_df = df
        filtered_intervals = intervals

    # Plot the rain values
    fig, ax = plt.subplots(figsize=(150,10))
    ax.plot(filtered_df['Measurement date'], filtered_df['Rain'], linewidth=0.5, markersize=12, label='Rain value')
    ax.scatter(filtered_intervals['max_rain_time'], filtered_intervals['max_rain_value'], color='red', label='Max rain value')

    # Set the frequency of the x-axis ticks
    if x_ticks == 'monthly':
        ax.xaxis.set_major_locator(mdates.MonthLocator())
    elif x_ticks == 'weekly':
        ax.xaxis.set_major_locator(mdates.WeekdayLocator())
    elif x_ticks == 'daily':
        ax.xaxis.set_major_locator(mdates.DayLocator())

    ax.axhline(y=rain_threshold, color='g', linestyle='--', label='Rain threshold')

    ax.set_xlabel('Date')
    ax.set_ylabel('Rain Value')
    ax.set_title('Rain Values Over Time')

    # display intervals as shaded areas
    for _, interval in filtered_intervals.iterrows():
        ax.axvspan(interval['start_time'], interval['end_time'], color='red', alpha=0.5)

    fig.autofmt_xdate()  # Rotate and align the x labels
    plt.savefig(filename + '.png')
    plt.show()


# for year in range(2014, 2023):
#     plot_rain(lodz_df, lodz_rain_intervals, start_date=f"{year}-01-01", end_date=f"{year}-12-31", x_ticks='monthly', filename=f'lodz_plot_{year}')

plot_rain(lodz_df, lodz_rain_intervals, start_date="2016-05-01", end_date="2016-05-30", x_ticks='daily', filename=f'lodz_plot')

display(lodz_rain_intervals)

NameError: name 'lodz_rain_intervals' is not defined