In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

# Chemin vers les données
data_path = 'C:\\Users\\pc\\Desktop\\data'

# Fonction pour charger les fichiers CSV d'un batch par morceaux
def load_batch_in_chunks(batch_path, chunk_size=10000):
    all_files = [os.path.join(batch_path, f) for f in os.listdir(batch_path) if f.endswith('.csv')]
    df_list = []
    for file in all_files:
        for chunk in pd.read_csv(file, chunksize=chunk_size):
            df_list.append(chunk)
    return pd.concat(df_list, ignore_index=True)

# Charger les données des trois batchs par morceaux
batch1_data = load_batch_in_chunks(os.path.join(data_path, 'batch 1'))
batch2_data = load_batch_in_chunks(os.path.join(data_path, 'batch 2'))
batch3_data = load_batch_in_chunks(os.path.join(data_path, 'batch 3'))

# Concaténer les données des trois batchs en une seule DataFrame
all_data = pd.concat([batch1_data, batch2_data, batch3_data], ignore_index=True)

# Préparation des données
features = all_data[['Data_Point', 'Test_Time', 'Step_Time', 'Step_Index', 'Cycle_Index', 
                     'Current', 'Voltage', 'Charge_Capacity', 'Charge_Energy', 'Discharge_Energy', 
                     'dV/dt', 'Internal_Resistance', 'Temperature']]
target = all_data['Discharge_Capacity']

# Division des données
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)

# Entraînement du modèle
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Prédictions
y_pred = model.predict(X_test)

# Évaluation du modèle
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse}')
print(f'R^2 Score: {r2}')

# Affichage des résultats
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_pred, alpha=0.3)
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], '--r', linewidth=2)
plt.xlabel('Valeurs Réelles')
plt.ylabel('Prédictions')
plt.title('Prédictions vs Valeurs Réelles')
plt.show()
