In [None]:
import os
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Path to the folder containing CSV files
folder_path = 'archive\cleaned_dataset\data'

# List all CSV files in the folder
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
print(csv_files)

In [None]:
import pandas as pd

# Inspect the first few rows of each CSV file
for filename in csv_files[:5]:  # Change the range to inspect more files
    file_path = os.path.join(folder_path, filename)
    df = pd.read_csv(file_path)
    print(f"Contents of {filename}:")
    print(df.head())  # Display the first few rows
    print(df.columns)  # Display the column names
    print("\n")

In [None]:
# List to hold relevant files
relevant_files = []

# Check each file for required columns
for filename in csv_files:
    file_path = os.path.join(folder_path, filename)
    df = pd.read_csv(file_path)
    
    # Check if all required columns are present
    required_columns = ['Voltage_measured', 'Current_measured', 'Temperature_measured', 
                        'Current_charge', 'Voltage_charge', 'Time']
    
    if all(col in df.columns for col in required_columns):
        relevant_files.append(filename)

print("Relevant CSV files:")
print(relevant_files)

In [None]:
# Load relevant CSV files into a single DataFrame
dataframes = []
combined_data=[]
for filename in relevant_files:
    file_path = os.path.join(folder_path, filename)
    df = pd.read_csv(file_path)
    dataframes.append(df)

# Concatenate all DataFrames into one
combined_data = pd.concat(dataframes, ignore_index=True)
combined_data['Current_measured'].replace(0, pd.NA, inplace=True)
combined_data['Impedance'] = combined_data['Voltage_measured'] / combined_data['Current_measured']

# Display the first few rows of the updated DataFrame
print(combined_data[['Voltage_measured', 'Current_measured', 'Impedance']].head())

# Display the first few rows of the combined DataFrame
print(combined_data.head())
# Check for missing values
print(combined_data.isnull().sum())
# Option 1: Drop rows with missing values
combined_data.dropna(inplace=True)
combined_data['dQ'] = combined_data['Current_charge'].diff()  # Change in charge
combined_data['dV'] = combined_data['Voltage_measured'].diff()  # Change in voltage

# Calculate incremental capacity (dQ/dV)
# Avoid division by zero by replacing dV with NaN where dV is zero
combined_data['dV'].replace(0, pd.NA, inplace=True)
combined_data['dQ/dV'] = combined_data['dQ'] / combined_data['dV']

# Drop rows with NaN values in dQ/dV for plotting
incremental_capacity_data = combined_data.dropna(subset=['dQ/dV'])

# Option 2: Fill missing values with the mean (for numerical columns)
combined_data.fillna(combined_data.mean(), inplace=True)
total_capacity = np.trapz(incremental_capacity_data['dQ/dV'], incremental_capacity_data['Voltage_measured'])

# Add the total capacity to the dataset
combined_data['Total_Capacity'] = total_capacity
# Example: Convert 'Time' to datetime if it's in string format
combined_data['Time'] = pd.to_datetime(combined_data['Time'], unit='s')  # Adjust as necessary

In [None]:
# Descriptive statistics
print(combined_data.describe())

In [None]:

# Histogram of Voltage_measured
plt.figure(figsize=(10, 6))
sns.histplot(combined_data['Voltage_measured'], bins=30, kde=True)
plt.title('Distribution of Voltage Measured')
plt.xlabel('Voltage (V)')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Correlation matrix
correlation_matrix = combined_data.corr()
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, fmt=".2f", cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

In [None]:
# Assuming you have calculated impedance
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(combined_data['Temperature_measured'], combined_data['Impedance'], combined_data.index, c='b', marker='o')
ax.set_xlabel('Temperature (°C)')
ax.set_ylabel('Impedance (Ohms)')
ax.set_zlabel('Aging Cycles')
plt.title('3D Plot of Impedance Changes')
plt.show()

this is the end of task for a 3d plot

this is b part of the assignment 

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(incremental_capacity_data['Voltage_measured'], incremental_capacity_data['dQ/dV'], incremental_capacity_data.index, c='r', marker='o')
ax.set_xlabel('Voltage')
ax.set_ylabel('Incremental Capacity')
ax.set_zlabel('Index (Aging Cycles)')
plt.title('3D Plot of Incremental Capacity Changes')
plt.show()

building a machine learning model that predicts the current capacity of battery from  EIS SIGNATURE

In [24]:
# Prepare features and target variable
X = combined_data[['Voltage_measured', 'Current_measured', 'Temperature_measured', 'Impedance']]  # Example features
y = combined_data['Total_Capacity']  # Replace with the actual target variable

In [25]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn .ensemble import RandomForestRegressor

# Initialize the model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_squared_error, r2_score

# Make predictions
y_pred = model.predict(X_test)

# Calculate performance metrics
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

In [None]:
importances = model.feature_importances_
feature_names = X.columns

# Create a DataFrame for visualization
feature_importance_df = pd.DataFrame({'Feature': feature_names, 'Importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)

# Plot feature importance
plt.figure(figsize=(10, 6))
sns.barplot(x='Importance', y='Feature', data=feature_importance_df)
plt.title('Feature Importance')
plt.show()

In [32]:
def predict_current_capacity(voltage, current, temperature, impedance):
    # Create a DataFrame for the input values
    input_data = pd.DataFrame({
        'Voltage_measured': [voltage],
        'Current_measured': [current],
        'Temperature_measured': [temperature],
        'Impedance': [impedance]
    })
    predicted_capacity = model.predict(input_data)
    return predicted_capacity[0]

In [None]:
voltage_input = float(input())
current_input = float(input())
temperature_input = float(input())
impedance_input = float(input())
predicted_capacity = predict_current_capacity(voltage_input, current_input, temperature_input, impedance_input)

# Display the result
print(f"Predicted Current Capacity: {predicted_capacity:.2f} Ah")
