# Problem Analysis

This notebook analyses the underlying problem for the analysis chapter in the Thesis.

In [2]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import utilities.latex_figures as latex_figs
import utilities.plotting as plots
import utilities.preprocess_raw_data as prep_data

In [3]:
try:
    df = pd.read_parquet("./data/my_datasets/base_dataset.parquet")
except FileNotFoundError:
    try:
        df = prep_data.load_base_dataset(saved_raw_merged_df_path="./data/my_datasets/raw_merged.parquet")
    except FileNotFoundError:
        df = prep_data.load_base_dataset()
    df.to_parquet("./data/my_datasets/base_dataset.parquet")

In [4]:
print("Device_uuids", df["device_uuid"].nunique())

print("Cycle_ids", df["cycle_id"].nunique())

Device_uuids 83
Cycle_ids 182


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29344 entries, 0 to 29343
Data columns (total 10 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   status_time            29344 non-null  datetime64[ns]
 1   device_uuid            29344 non-null  object        
 2   battery_level_percent  29344 non-null  float64       
 3   radio_level_percent    29344 non-null  float64       
 4   air_temperature        29344 non-null  float64       
 5   battery_type_id        29344 non-null  float64       
 6   fw_version             29344 non-null  object        
 7   device_model_code      29344 non-null  object        
 8   battery_diff           29344 non-null  float64       
 9   cycle_id               29344 non-null  float64       
dtypes: datetime64[ns](1), float64(6), object(3)
memory usage: 2.2+ MB


In [6]:
df["battery_type_id"] = (
    df["battery_type_id"]
    .astype(str)
    .replace({"1.0": "Photo Lithium (<-10 °C)", "2.0": "Alkali Mangan (>-10 °C)"})
)

In [7]:
title = "Histogramm der Batterieladungen."


fig = px.histogram(
    df,
    x="battery_level_percent",
    width=1000,
    height=600,
    title=title,
    range_x=[0, 100],
)
fig.update_xaxes(title="Batterieladung in %")
fig.update_yaxes(title="Häufigkeit")
fig.show()
latex_fig = latex_figs.LatexFigure(
    "resources/figures/analysis/analysis_battery_level_percent.png",
    fig,
    caption=title,
    label="analysis_battery_level_percent",
)
latex_fig.save_figure()
latex_fig.write_latex_code_to_file("analysis_battery_level_percent.txt")

Saving subfigure to "/home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/resources/figures/analysis/analysis_battery_level_percent.png"...
Done!
Writing latex code to /home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/analysis_battery_level_percent.txt
Done!


In [8]:
df.groupby("cycle_id")["battery_diff"].mean()

cycle_id
0.0     -0.224284
1.0     -0.864912
2.0     -1.288333
3.0     -1.389423
5.0     -0.383333
           ...   
239.0   -0.788462
240.0   -0.433333
241.0   -1.412121
242.0   -1.327957
243.0    0.953125
Name: battery_diff, Length: 182, dtype: float64

In [9]:
title = "Histogramm der Batterie Entladungen."


fig = px.histogram(
    df.groupby("cycle_id")["battery_diff"].mean(),
    x="battery_diff",
    width=1000,
    height=600,
    title=title,
    nbins=100,
)

fig.update_xaxes(title="Batterie Entladung in % pro Tag")
fig.update_yaxes(title="Häufigkeit")
fig.show()
latex_fig = latex_figs.LatexFigure(
    "resources/figures/analysis/analysis_battery_diff.png",
    fig,
    caption=title,
    label="analysis_battery_diff",
)
latex_fig.save_figure()
latex_fig.write_latex_code_to_file("analysis_battery_diff.txt")

Saving subfigure to "/home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/resources/figures/analysis/analysis_battery_diff.png"...
Done!
Writing latex code to /home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/analysis_battery_diff.txt
Done!


In [10]:
title = "Histogramm der WLAN-Empfangsstärke."

fig = px.histogram(
    df,
    x="radio_level_percent",
    nbins=100,
    width=1000,
    height=600,
    title=title,
)
fig.update_xaxes(title="WLAN-Empfangsstärke in %")
fig.update_yaxes(title="Häufigkeit")
fig.show()

latex_fig = latex_figs.LatexFigure(
    "resources/figures/analysis/analysis_radio_level_percent.png",
    fig,
    caption=title,
    label="analysis_radio_level_percent",
)
latex_fig.save_figure()
latex_fig.write_latex_code_to_file("analysis_radio_level_percent.txt")

Saving subfigure to "/home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/resources/figures/analysis/analysis_radio_level_percent.png"...
Done!
Writing latex code to /home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/analysis_radio_level_percent.txt
Done!


In [11]:
title = "Histogramm der Lufttemperatur, farblich unterteilt in Batterietypen."

fig = px.histogram(
    df[df["air_temperature"] < 40],  # Outlier rauswerfen
    x="air_temperature",
    nbins=100,
    color="battery_type_id",
    labels={"battery_type_id": "Batterietyp"},
    barmode="overlay",
    width=1000,
    height=600,
    title=title,
)
fig.update_xaxes(title="Lufttemperatur in °C")
fig.update_yaxes(title="Häufigkeit")
fig.show()

latex_fig = latex_figs.LatexFigure(
    "resources/figures/analysis/analysis_air_temperature.png",
    fig,
    caption=title,
    label="analysis_air_temperature",
)
latex_fig.save_figure()
latex_fig.write_latex_code_to_file("analysis_air_temperature.txt")

Saving subfigure to "/home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/resources/figures/analysis/analysis_air_temperature.png"...
Done!
Writing latex code to /home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/analysis_air_temperature.txt
Done!


In [12]:
title = "Vergleich der Häufigkeit der Batterietypen"

fig = go.Figure(
    go.Histogram(
        x=df["battery_type_id"],
        histnorm="probability",
        marker=dict(color=px.colors.qualitative.Plotly),
        nbinsx=5,
    )
)
fig.update_yaxes(title="Relative Häufigkeit")
fig.update_xaxes(title="Batterietyp")
fig.update_layout(
    title=title,
    width=600,
    height=600,
)
fig.show()

latex_fig = latex_figs.LatexFigure(
    "resources/figures/analysis/analysis_battery_type.png",
    fig,
    caption=title,
    label="analysis_battery_type",
)
latex_fig.save_figure()
latex_fig.write_latex_code_to_file("analysis_battery_type.txt", width=0.5)

Saving subfigure to "/home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/resources/figures/analysis/analysis_battery_type.png"...
Done!
Writing latex code to /home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/analysis_battery_type.txt
Done!


In [13]:
title = "Vergleich der Häufigkeiten der Modelltypen"

fig = go.Figure(
    go.Histogram(
        x=df["device_model_code"].astype(str),
        histnorm="probability",
        marker=dict(color=px.colors.qualitative.Plotly),
        nbinsx=5,
    ),
)
fig.update_yaxes(title="Relative Häufigkeit")
fig.update_xaxes(title="Gerätemodell")
fig.update_layout(
    title=title,
    width=600,
    height=600,
)
fig.show()

latex_fig = latex_figs.LatexFigure(
    "resources/figures/analysis/analysis_model_code.png",
    fig,
    caption=title,
    label="analysis_model_code",
)
latex_fig.save_figure()
latex_fig.write_latex_code_to_file("analysis_model_code.txt", width=0.5)

Saving subfigure to "/home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/resources/figures/analysis/analysis_model_code.png"...
Done!
Writing latex code to /home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/analysis_model_code.txt
Done!


In [14]:
title = "Vergleich der Häufigkeit der Firmwareversionen"

fig = go.Figure(
    go.Histogram(
        x=df["fw_version"].astype(str),
        histnorm="probability",
        marker=dict(color=px.colors.qualitative.Plotly),
        nbinsx=5,
    )
)
fig.update_yaxes(title="Relative Häufigkeit")
fig.update_xaxes(title="Firmware Version")
fig.update_layout(
    title=title,
    width=600,
    height=600,
)
fig.show()

latex_fig = latex_figs.LatexFigure(
    "resources/figures/analysis/analysis_fw_version.png",
    fig,
    caption=title,
    label="analysis_fw_version",
)
latex_fig.save_figure()
latex_fig.write_latex_code_to_file("analysis_fw_version.txt", width=0.5)

Saving subfigure to "/home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/resources/figures/analysis/analysis_fw_version.png"...
Done!
Writing latex code to /home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/analysis_fw_version.txt
Done!


In [15]:
title = "Histogramm der Restladung beim Austausch der Batterie."

batt_min = df.groupby("cycle_id")["battery_level_percent"].min()
mean = batt_min.mean()
fig = px.histogram(
    x=batt_min,
    nbins=50,
    width=1000,
    height=600,
    title=title,
)
fig.update_xaxes(title="Restladung bei Austausch")
fig.update_yaxes(title="Häufigkeit")

fig.add_vline(
    x=mean,
    line_dash="dash",
    line_color="red",
    annotation_text=f"Durchschnitt {mean:.1f}",
    annotation_position="top right",
)

fig.show()

latex_fig = latex_figs.LatexFigure(
    "resources/figures/analysis/analysis_displacement.png",
    fig,
    caption=title,
    label="analysis_displacement",
)
latex_fig.save_figure()
latex_fig.write_latex_code_to_file("analysis_displacement.txt")

Saving subfigure to "/home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/resources/figures/analysis/analysis_displacement.png"...
Done!
Writing latex code to /home/nkuechen/Documents/Thesis/latex/Bachelor Thesis/analysis_displacement.txt
Done!


In [28]:
surv_df = pd.read_parquet("./data/my_datasets/survival_dataset.parquet")
surv_df["event"].value_counts()
uncensored = surv_df[surv_df["event"]]["event"].count()
censored = surv_df[~surv_df["event"]]["event"].count()
print(uncensored)
print(censored)
print(f"Anteil: {censored/(censored+uncensored)*100:.1f} %")

50
101
Anteil: 66.9 %


In [27]:
surv_df_t30 = pd.read_parquet("./data/my_datasets/survival_dataset_t30.parquet")
surv_df_t30["event"].value_counts()
uncensored = surv_df_t30[surv_df_t30["event"]]["event"].count()
censored = surv_df_t30[~surv_df_t30["event"]]["event"].count()
print(uncensored)
print(censored)
print(f"Anteil: {censored/(censored+uncensored)*100:.1f} %")

66
85
Anteil: 56.3 %
