In [1]:
import pandas as pd

# columns: Measurement date,Dose strength,Rain

lodz_df = pd.read_csv("data/lodz.csv")
gdynia_df = pd.read_csv("data/gdynia.csv")
# krakow_df = pd.read_csv("data/krakow.csv")
# warszawa_df = pd.read_csv("data/warszawa.csv")
# lublin_df = pd.read_csv("data/lublin.csv")

In [2]:
def split_df_into_12h(df):
    df["Measurement date"] = pd.to_datetime(df["Measurement date"])
    dfs = [group for _, group in df.groupby(pd.Grouper(key="Measurement date", freq="12h"))]
    return dfs

In [3]:
rain_threshold = 2

def combine_dataframes(df_list, column):
    # Initialize an empty DataFrame with the required columns
    combined_df = pd.DataFrame(columns=["window_id", "start_time", "start_time_value", "end_time", "end_time_value", "max_time", "max_value", "integral"])

    for df in df_list:
        window_id = df["window_id"].values[0]

        # Get the start time, end time, and max rain value
        start_time = df["Measurement date"].min()
        start_time_value = df[df["Measurement date"] == start_time][column].values[0]

        end_time = df["Measurement date"].max()
        end_time_value = df[df["Measurement date"] == end_time][column].values[0]

        max_rain_value = df[column].max()
        max_rain_time = df[df[column] == max_rain_value]["Measurement date"].values[0]

        integral = df[column].sum()

        # Append the information to the combined DataFrame
        new_row = pd.DataFrame({
            "window_id": [window_id],
            "start_time": [start_time],
            "start_time_value": [start_time_value],
            "end_time": [end_time],
            "end_time_value": [end_time_value],
            "max_time": [max_rain_time],
            "max_value": [max_rain_value],
            "integral": [integral]
        })
        combined_df = pd.concat([combined_df, new_row], ignore_index=True)

    return combined_df

def get_rain_intervals(intervals):

    result_rain_intervals = []
    for index in range(len(intervals)):
        interval = intervals[index]
        if interval["Rain"].max() <= rain_threshold:
            continue

        rain_intervals = []
        rain_interval = []
        for _, row in interval.iterrows():
            if row["Rain"] >= rain_threshold:
                rain_interval.append(row)
            else:
                if rain_interval:

                    interval_df = pd.DataFrame(rain_interval)
                    interval_df["window_id"] = index
                    rain_intervals.append(interval_df)
                    rain_interval = []
        if rain_intervals:
            max_rain_interval = max(
                rain_intervals, key=lambda x: max(row["Rain"] for _, row in x.iterrows())
            )
            result_rain_intervals.append(max_rain_interval)

    return combine_dataframes(result_rain_intervals, "Rain")


def get_dose_strength_intervals(intervals):
    result_dose_strength_intervals = []
    for index in range(len(intervals)):
        interval = intervals[index]
        average_dose_strength = interval["Dose strength"].mean()
        if interval["Dose strength"].max() < average_dose_strength * 1.15:
            continue

        dose_strength_intervals = []
        dose_strength_interval = []
        for _, row in interval.iterrows():
            if row["Dose strength"] >= average_dose_strength * 1.05:
                dose_strength_interval.append(row)
            else:
                if dose_strength_interval:
                    interval_df = pd.DataFrame(dose_strength_interval)
                    interval_df["window_id"] = index
                    dose_strength_intervals.append(interval_df)
                    dose_strength_interval = []
        if dose_strength_intervals:
            max_dose_strength_interval = max(
                dose_strength_intervals, key=lambda x: max(row["Dose strength"] for _, row in x.iterrows())
            )
            result_dose_strength_intervals.append(max_dose_strength_interval)


    return combine_dataframes(result_dose_strength_intervals, "Dose strength")


def get_combined_intervals(df):
    intervals_12h = split_df_into_12h(df)
    rain_intervals = get_rain_intervals(intervals_12h)
    dose_strength_intervals = get_dose_strength_intervals(intervals_12h)

    max_index = max(rain_intervals["window_id"].max(), dose_strength_intervals["window_id"].max())

    combined_intervals = pd.DataFrame(
        columns=[
            "window_id",
            "start_time",
            "start_value_rain",
            "start_value_dose_strength",
            "end_time",
            "end_value_rain",
            "end_value_dose_strength",
            "max_time_rain",
            "max_value_rain",
            "max_time_dose_strength",
            "max_value_dose_strength",
            "integral_rain",
            "integral_dose_strength",
        ]
    )

    for index in range(max_index + 1):
        rain_interval = rain_intervals[rain_intervals["window_id"] == index]
        dose_strength_interval = dose_strength_intervals[dose_strength_intervals["window_id"] == index]

        if rain_interval.empty or dose_strength_interval.empty:
            continue

        start_time = min(rain_interval["start_time"].values[0], dose_strength_interval["start_time"].values[0])
        start_value_rain = rain_interval["start_time_value"].values[0]
        start_value_dose_strength = dose_strength_interval["start_time_value"].values[0]

        end_time = max(rain_interval["end_time"].values[0], dose_strength_interval["end_time"].values[0])
        end_value_rain = rain_interval["end_time_value"].values[0]
        end_value_dose_strength = dose_strength_interval["end_time_value"].values[0]

        max_time_rain = rain_interval["max_time"].values[0]
        max_value_rain = rain_interval["max_value"].values[0]
        max_time_dose_strength = dose_strength_interval["max_time"].values[0]
        max_value_dose_strength = dose_strength_interval["max_value"].values[0]

        integral_rain = rain_interval["integral"].values[0]
        integral_dose_strength = dose_strength_interval["integral"].values[0]

        new_row = pd.DataFrame(
            {
                "window_id": [index],
                "start_time": [start_time],
                "start_value_rain": [start_value_rain],
                "start_value_dose_strength": [start_value_dose_strength],
                "end_time": [end_time],
                "end_value_rain": [end_value_rain],
                "end_value_dose_strength": [end_value_dose_strength],
                "max_time_rain": [max_time_rain],
                "max_value_rain": [max_value_rain],
                "max_time_dose_strength": [max_time_dose_strength],
                "max_value_dose_strength": [max_value_dose_strength],
                "integral_rain": [integral_rain],
                "integral_dose_strength": [integral_dose_strength],
            }
        )

        combined_intervals = pd.concat([combined_intervals, new_row], ignore_index=True)

    return combined_intervals


In [4]:
lodz_combined_intervals = get_combined_intervals(lodz_df)
gdynia_combined_intervals = get_combined_intervals(gdynia_df)


display(lodz_combined_intervals)
display(gdynia_combined_intervals)

  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_intervals = pd.concat([combined_intervals, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_df = pd.concat([combined_df, new_row], ignore_index=True)
  combined_intervals = pd.concat([combined_intervals, new_row], ignore_index=True)


Unnamed: 0,window_id,start_time,start_value_rain,start_value_dose_strength,end_time,end_value_rain,end_value_dose_strength,max_time_rain,max_value_rain,max_time_dose_strength,max_value_dose_strength,integral_rain,integral_dose_strength
0,906,2015-03-30 03:00:00,5,0.108,2015-03-30 05:00:00,5,0.102,2015-03-30 03:00:00,5,2015-03-30 04:00:00,0.114,5,0.324
1,1021,2015-05-26 12:00:00,4,0.108,2015-05-26 13:00:00,4,0.101,2015-05-26 12:00:00,4,2015-05-26 12:00:00,0.108,4,0.209
2,1055,2015-06-12 16:00:00,5,0.118,2015-06-12 19:00:00,2,0.104,2015-06-12 17:00:00,10,2015-06-12 17:00:00,0.118,17,0.333
3,1848,2016-07-13 01:00:00,2,0.11,2016-07-13 04:00:00,2,0.114,2016-07-13 02:00:00,4,2016-07-13 04:00:00,0.114,12,0.329
4,1851,2016-07-14 12:00:00,7,0.106,2016-07-14 13:00:00,7,0.0969,2016-07-14 12:00:00,7,2016-07-14 12:00:00,0.106,7,0.2029
5,1873,2016-07-25 12:00:00,23,0.105,2016-07-25 16:00:00,2,0.12,2016-07-25 12:00:00,23,2016-07-25 13:00:00,0.147,40,0.372
6,2501,2017-06-04 16:00:00,6,0.112,2017-06-04 17:00:00,6,0.105,2017-06-04 16:00:00,6,2017-06-04 16:00:00,0.112,6,0.217
7,2548,2017-06-28 00:00:00,3,0.121,2017-06-28 04:00:00,9,0.134,2017-06-28 02:00:00,15,2017-06-28 03:00:00,0.138,36,0.649
8,2593,2017-07-20 13:00:00,7,0.113,2017-07-20 14:00:00,7,0.105,2017-07-20 13:00:00,7,2017-07-20 13:00:00,0.113,7,0.218
9,2594,2017-07-21 06:00:00,3,0.107,2017-07-21 08:00:00,3,0.113,2017-07-21 06:00:00,3,2017-07-21 07:00:00,0.115,6,0.335


Unnamed: 0,window_id,start_time,start_value_rain,start_value_dose_strength,end_time,end_value_rain,end_value_dose_strength,max_time_rain,max_value_rain,max_time_dose_strength,max_value_dose_strength,integral_rain,integral_dose_strength
0,501,2014-09-08 12:00:00,5.3,0.133,2014-09-08 14:00:00,11.7,0.13,2014-09-08 13:00:00,11.7,2014-09-08 13:00:00,0.133,17.0,0.263
1,1218,2015-09-02 00:00:00,2.1,0.13,2015-09-02 01:00:00,2.1,0.127,2015-09-02 00:00:00,2.1,2015-09-02 00:00:00,0.13,2.1,0.257
2,1796,2016-06-17 07:00:00,4.2,0.125,2016-06-17 10:00:00,7.8,0.125,2016-06-17 10:00:00,7.8,2016-06-17 07:00:00,0.125,12.0,0.125
3,2604,2017-07-26 04:00:00,9.7,0.137,2017-07-26 06:00:00,7.5,0.141,2017-07-26 05:00:00,16.2,2017-07-26 06:00:00,0.141,33.4,0.278
4,2677,2017-08-31 17:00:00,9.9,0.127,2017-08-31 19:00:00,6.9,0.121,2017-08-31 17:00:00,9.9,2017-08-31 18:00:00,0.127,16.8,0.248
5,3392,2018-08-24 04:00:00,7.7,0.14,2018-08-24 06:00:00,15.4,0.138,2018-08-24 05:00:00,15.4,2018-08-24 05:00:00,0.14,23.1,0.278
6,3969,2019-06-08 12:00:00,11.2,0.124,2019-06-08 14:00:00,2.8,0.116,2019-06-08 12:00:00,11.2,2019-06-08 13:00:00,0.124,14.0,0.24
7,3993,2019-06-20 13:00:00,13.7,0.123,2019-06-20 14:00:00,13.7,0.128,2019-06-20 13:00:00,13.7,2019-06-20 14:00:00,0.128,13.7,0.251
8,4873,2020-09-02 12:00:00,3.5,0.129,2020-09-02 12:00:00,3.5,0.129,2020-09-02 12:00:00,3.5,2020-09-02 12:00:00,0.129,3.5,0.129
9,5492,2021-07-09 02:00:00,4.5,0.119,2021-07-09 04:00:00,5.9,0.12,2021-07-09 03:00:00,5.9,2021-07-09 03:00:00,0.133,10.4,0.372


In [5]:
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

def plot_rain(df, intervals, start_date=None, end_date=None, filename='plot', x_ticks='monthly'):
    # Filter the DataFrame based on the start and end dates
    if start_date and end_date:
        mask = (df['Measurement date'] >= start_date) & (df['Measurement date'] <= end_date)
        filtered_df = df.loc[mask]

        intervals_mask = (intervals['start_time'] >= start_date) & (intervals['end_time'] <= end_date)
        filtered_intervals = intervals.loc[intervals_mask]
    else:
        filtered_df = df
        filtered_intervals = intervals

    # Plot the rain values
    fig, ax = plt.subplots(figsize=(150,10))
    ax.plot(filtered_df['Measurement date'], filtered_df['Rain'], linewidth=0.5, markersize=12, label='Rain value')
    ax.scatter(filtered_intervals['max_rain_time'], filtered_intervals['max_rain_value'], color='red', label='Max rain value')

    # Set the frequency of the x-axis ticks
    if x_ticks == 'monthly':
        ax.xaxis.set_major_locator(mdates.MonthLocator())
    elif x_ticks == 'weekly':
        ax.xaxis.set_major_locator(mdates.WeekdayLocator())
    elif x_ticks == 'daily':
        ax.xaxis.set_major_locator(mdates.DayLocator())

    ax.axhline(y=rain_threshold, color='g', linestyle='--', label='Rain threshold')

    ax.set_xlabel('Date')
    ax.set_ylabel('Rain Value')
    ax.set_title('Rain Values Over Time')

    # display intervals as shaded areas
    for _, interval in filtered_intervals.iterrows():
        ax.axvspan(interval['start_time'], interval['end_time'], color='red', alpha=0.5)

    fig.autofmt_xdate()  # Rotate and align the x labels
    plt.savefig(filename + '.png')
    plt.show()


# for year in range(2014, 2023):
#     plot_rain(lodz_df, lodz_rain_intervals, start_date=f"{year}-01-01", end_date=f"{year}-12-31", x_ticks='monthly', filename=f'lodz_plot_{year}')

plot_rain(lodz_df, lodz_rain_intervals, start_date="2016-05-01", end_date="2016-05-30", x_ticks='daily', filename=f'lodz_plot')

display(lodz_rain_intervals)

NameError: name 'lodz_rain_intervals' is not defined