In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

# Chargement des données
store = pd.read_csv('store.csv')
train = pd.read_csv('train.csv')

# Analyse rapide
print(train.info())
print(train.describe())
print(store.info())
print(store.describe())

# Fusion des datasets sur 'Store'
data = pd.merge(train, store, on='Store', how='left')

# Nettoyage : suppression des valeurs manquantes
data_clean = data.dropna()

# Sélection des features et de la cible
X = data_clean.drop(['Sales', 'Date', 'Customers'], axis=1, errors='ignore')
y = data_clean['Sales']

# Encodage des variables catégorielles
X = pd.get_dummies(X)

# Séparation train/test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Modélisation : Random Forest
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Prédiction et évaluation
y_pred = model.predict(X_test)
rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f'RMSE: {rmse:.2f}')