In [53]:
import pandas as pd
import os
import plotly.express as px

In [54]:
# Define the dataset directory
DATASET_DIR = "cleaned_dataset"  # Update this to the path where the dataset is located

In [55]:
# Load metadata.csv
metadata_path = os.path.join(DATASET_DIR, "metadata.csv")
metadata = pd.read_csv(metadata_path, sep=',')  # Ensure correct separator

In [56]:
# Debugging: Print column names and sample data
print("Column names in the dataset:")
print(metadata.columns)
print("\nSample data:")
print(metadata.head())

Column names in the dataset:
Index(['type', 'start_time', 'ambient_temperature', 'battery_id', 'test_id',
       'uid', 'filename', 'Capacity', 'Re', 'Rct'],
      dtype='object')

Sample data:
        type                                         start_time  \
0  discharge  [2010.       7.      21.      15.       0.    ...   
1  impedance  [2010.       7.      21.      16.      53.    ...   
2     charge  [2010.       7.      21.      17.      25.    ...   
3  impedance                    [2010    7   21   20   31    5]   
4  discharge  [2.0100e+03 7.0000e+00 2.1000e+01 2.1000e+01 2...   

   ambient_temperature battery_id  test_id  uid   filename  \
0                    4      B0047        0    1  00001.csv   
1                   24      B0047        1    2  00002.csv   
2                    4      B0047        2    3  00003.csv   
3                   24      B0047        3    4  00004.csv   
4                    4      B0047        4    5  00005.csv   

             Capacity         

In [57]:
# Ensure Re and Rct columns are numeric
metadata["Re"] = pd.to_numeric(metadata["Re"], errors="coerce")  # Convert to numeric, coercing errors to NaN
metadata["Rct"] = pd.to_numeric(metadata["Rct"], errors="coerce")  # Convert to numeric, coercing errors to NaN

In [58]:
# Check for missing values in Re and Rct
print("\nNull value counts in Re and Rct columns before filling:")
print(metadata[["Re", "Rct"]].isnull().sum())


Null value counts in Re and Rct columns before filling:
Re     5618
Rct    5618
dtype: int64


In [59]:
# Plot histograms for Re and Rct columns before filling missing values
fig_re_hist = go.Figure()
fig_re_hist.add_trace(go.Histogram(x=metadata["Re"].dropna(), nbinsx=50, name="Re"))
fig_re_hist.update_layout(
    title="Distribution of Electrolyte Resistance (Re) Before Filling Missing Values",
    xaxis_title="Electrolyte Resistance (Ohms)",
    yaxis_title="Frequency",
    template="plotly_dark"
)
fig_re_hist.show()

In [60]:
fig_rct_hist = go.Figure()
fig_rct_hist.add_trace(go.Histogram(x=metadata["Rct"].dropna(), nbinsx=50, name="Rct"))
fig_rct_hist.update_layout(
    title="Distribution of Charge Transfer Resistance (Rct) Before Filling Missing Values",
    xaxis_title="Charge Transfer Resistance (Ohms)",
    yaxis_title="Frequency",
    template="plotly_dark"
)
fig_rct_hist.show()

In [61]:
# Fill missing values using the median
metadata["Re"].fillna(metadata["Re"].median(), inplace=True)
metadata["Rct"].fillna(metadata["Rct"].median(), inplace=True)


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.




A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.





In [62]:
# Verify that missing values have been filled
print("\nNull value counts in Re and Rct columns after filling:")
print(metadata[["Re", "Rct"]].isnull().sum())


Null value counts in Re and Rct columns after filling:
Re     0
Rct    0
dtype: int64


In [63]:
# Check the distribution of the 'type' column
print("\nValue counts in the 'type' column:")
print(metadata["type"].value_counts())


Value counts in the 'type' column:
type
charge       2815
discharge    2794
impedance    1956
Name: count, dtype: int64


In [64]:
# Ensure 'type' column has consistent values
metadata["type"] = metadata["type"].str.lower().str.strip()

In [65]:
# Add a cycle number for plotting (assuming `uid` represents the cycle number)
metadata["cycle_number"] = metadata["uid"]

In [66]:
# Filter data for charge/discharge operations
charge_discharge_data = metadata[metadata["type"].isin(["charge", "discharge"])]

In [67]:
# Ensure data is sorted by cycle number for better visualization
charge_discharge_data = charge_discharge_data.sort_values(by="cycle_number")

In [68]:
#Inspect data ranges for charge and discharge
print("\nCharge Data Statistics:")
print(charge_discharge_data[charge_discharge_data["type"] == "charge"][["Re", "Rct"]].describe())

print("\nDischarge Data Statistics:")
print(charge_discharge_data[charge_discharge_data["type"] == "discharge"][["Re", "Rct"]].describe())


Charge Data Statistics:
                 Re           Rct
count  2.815000e+03  2.815000e+03
mean   7.255344e-02  1.014191e-01
std    2.776051e-17  2.776051e-17
min    7.255344e-02  1.014191e-01
25%    7.255344e-02  1.014191e-01
50%    7.255344e-02  1.014191e-01
75%    7.255344e-02  1.014191e-01
max    7.255344e-02  1.014191e-01

Discharge Data Statistics:
                 Re           Rct
count  2.794000e+03  2.794000e+03
mean   7.255344e-02  1.014191e-01
std    1.388027e-17  1.388027e-17
min    7.255344e-02  1.014191e-01
25%    7.255344e-02  1.014191e-01
50%    7.255344e-02  1.014191e-01
75%    7.255344e-02  1.014191e-01
max    7.255344e-02  1.014191e-01


In [69]:
# Slightly offset discharge values for better visualization
offset = 0.01  # Small offset for separation in visualization
charge_discharge_data.loc[charge_discharge_data["type"] == "discharge", "Re"] += offset
charge_discharge_data.loc[charge_discharge_data["type"] == "discharge", "Rct"] += offset

In [70]:
# Plot Re over charge/discharge cycles with custom colors and opacity
fig_re = px.line(
    charge_discharge_data,
    x="cycle_number",
    y="Re",
    color="type",
    title="Electrolyte Resistance (Re) Over Charge/Discharge Cycles",
    labels={"Re": "Electrolyte Resistance (Ohms)", "cycle_number": "Cycle Number"},
    color_discrete_map={"charge": "blue", "discharge": "red"},  # Custom colors
)
fig_re.update_traces(opacity=0.8)  # Add transparency
fig_re.update_layout(template="plotly_dark")
fig_re.show()

In [71]:
# Plot Rct over charge/discharge cycles with custom colors and opacity
fig_rct = px.line(
    charge_discharge_data,
    x="cycle_number",
    y="Rct",
    color="type",
    title="Charge Transfer Resistance (Rct) Over Charge/Discharge Cycles",
    labels={"Rct": "Charge Transfer Resistance (Ohms)", "cycle_number": "Cycle Number"},
    color_discrete_map={"charge": "blue", "discharge": "red"},  # Custom colors
)
fig_rct.update_traces(opacity=0.8)  # Add transparency
fig_rct.update_layout(template="plotly_dark")
fig_rct.show()

In [72]:
# Filter data for impedance operations
impedance_data = metadata[metadata["type"] == "impedance"]

In [73]:
# Ensure data is sorted by cycle number for better visualization
impedance_data = impedance_data.sort_values(by="cycle_number")

In [74]:
# Plot Re over impedance cycles with custom colors and opacity
fig_impedance_re = px.line(
    impedance_data,
    x="cycle_number",
    y="Re",
    color="type",
    title="Electrolyte Resistance (Re) Over Impedance Cycles",
    labels={"Re": "Electrolyte Resistance (Ohms)", "cycle_number": "Cycle Number"},
    color_discrete_map={"impedance": "green"},  # Custom color for impedance
)
fig_impedance_re.update_traces(opacity=0.8)  # Add transparency
fig_impedance_re.update_layout(template="plotly_dark")
fig_impedance_re.show()

In [75]:
# Plot Rct over impedance cycles with custom colors and opacity
fig_impedance_rct = px.line(
    impedance_data,
    x="cycle_number",
    y="Rct",
    color="type",
    title="Charge Transfer Resistance (Rct) Over Impedance Cycles",
    labels={"Rct": "Charge Transfer Resistance (Ohms)", "cycle_number": "Cycle Number"},
    color_discrete_map={"impedance": "green"},  # Custom color for impedance
)
fig_impedance_rct.update_traces(opacity=0.8)  # Add transparency
fig_impedance_rct.update_layout(template="plotly_dark")
fig_impedance_rct.show()