In [1]:
import pandas as pd
import os
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Chemin vers les données
data_path = 'C:\\Users\\pc\\Desktop\\data'

# Fonction pour charger un fichier CSV
def load_csv(file_path):
    return pd.read_csv(file_path)

# Initialiser le modèle
model = SGDRegressor(max_iter=1000, tol=1e-3)

# Obtenir la liste des fichiers CSV dans les trois batchs
batch1_files = [os.path.join(data_path, 'batch 1', f) for f in os.listdir(os.path.join(data_path, 'batch 1')) if f.endswith('.csv')]
batch2_files = [os.path.join(data_path, 'batch 2', f) for f in os.listdir(os.path.join(data_path, 'batch 2')) if f.endswith('.csv')]
batch3_files = [os.path.join(data_path, 'batch 3', f) for f in os.listdir(os.path.join(data_path, 'batch 3')) if f.endswith('.csv')]

# Diviser les fichiers en ensembles d'entraînement et de test
train_files = batch1_files[:8] + batch2_files[:3] + batch3_files[:3]
test_files = batch1_files[8:] + batch2_files[3:] + batch3_files[3:]

# Entraîner le modèle sur les fichiers d'entraînement
for file in train_files:
    data = load_csv(file)
    
    # Préparation des données
    features = data[['Data_Point', 'Test_Time', 'Step_Time', 'Step_Index', 'Cycle_Index', 
                     'Current', 'Voltage', 'Charge_Capacity', 'Charge_Energy', 'Discharge_Energy', 
                     'dV/dt', 'Internal_Resistance', 'Temperature']]
    target = data['Discharge_Capacity']
    
    # Entraînement du modèle par morceaux
    model.partial_fit(features, target)

ValueError: Input X contains NaN.
SGDRegressor does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [None]:
# Tester le modèle sur les fichiers de test
all_y_test = []
all_y_pred = []

for file in test_files:
    data = load_csv(file)
    
    # Préparation des données
    features = data[['Data_Point', 'Test_Time', 'Step_Time', 'Step_Index', 'Cycle_Index', 
                     'Current', 'Voltage', 'Charge_Capacity', 'Charge_Energy', 'Discharge_Energy', 
                     'dV/dt', 'Internal_Resistance', 'Temperature']]
    target = data['Discharge_Capacity']
    
    # Prédictions
    y_pred = model.predict(features)
    
    all_y_test.extend(target)
    all_y_pred.extend(y_pred)

In [None]:
# Évaluation du modèle
mse = mean_squared_error(all_y_test, all_y_pred)
r2 = r2_score(all_y_test, all_y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

In [None]:
# Affichage des résultats
plt.figure(figsize=(10, 6))
plt.scatter(all_y_test, all_y_pred, alpha=0.3)
plt.plot([min(all_y_test), max(all_y_test)], [min(all_y_test), max(all_y_test)], '--r', linewidth=2)
plt.xlabel('Valeurs Réelles')
plt.ylabel('Prédictions')
plt.title('Prédictions vs Valeurs Réelles')
plt.show()