In [33]:
#pip install -r requirements.txt
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
import chardet

with open('sales_data2.csv', 'rb') as f:
    result = chardet.detect(f.read())
    detected_encoding = result['encoding']
    print(f"Detected encoding: {detected_encoding}")

df = pd.read_csv('sales_data2.csv', encoding=detected_encoding)
df.to_csv('sales_data2.csv', encoding='utf-8', index=False)

df = pd.read_csv('sales_data2.csv', encoding='utf-8')

df.head()

In [None]:
df['Order Date'] = pd.to_datetime(df['Order Date'])
df['MONTH'] = df['Order Date'].dt.month
df['DAY'] = df['Order Date'].dt.day
df['YEAR'] = df['Order Date'].dt.year
df['TIME'] = df['Order Date'].dt.time

# Create automated index for categories
df['CATEGORY_ID'] = pd.factorize(df['catégorie'])[0] + 1

# Create automated index for products
df['PRODUCT_ID'] = pd.factorize(df['Product'])[0] + 1

# Drop the Product_ean column if it exists
if 'Product_ean' in df.columns:
    df = df.drop('Product_ean', axis=1)

df


In [None]:
# Xarxa Neuronal amb TensorFlow/Keras
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import StandardScaler

# Preparar les dades
df_nn = df.copy()

# Drop les columnes categòriques originals ja que tenim IDs
df_nn = df_nn.drop(['catégorie', 'Product'], axis=1)

# Encode columnes categòriques restants
categorical_cols_nn = df_nn.select_dtypes(include=['object']).columns
label_encoders_nn = {}
for col in categorical_cols_nn:
    le = LabelEncoder()
    df_nn[col] = le.fit_transform(df_nn[col].astype(str))
    label_encoders_nn[col] = le

# Separar features i target
X_nn = df_nn.drop(['turnover', 'margin', 'Order Date'], axis=1)
y_nn = df_nn['turnover']

# Split train/test
X_train_nn, X_test_nn, y_train_nn, y_test_nn = train_test_split(X_nn, y_nn, test_size=0.2, random_state=42)

# Normalitzar les dades (important per xarxes neuronals!)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_nn)
X_test_scaled = scaler.transform(X_test_nn)

print(f"Nombre de features: {X_train_scaled.shape[1]}")
print(f"Train samples: {X_train_scaled.shape[0]}")
print(f"Test samples: {X_test_scaled.shape[0]}")


In [None]:
# Crear el model de Xarxa Neuronal
model_nn = keras.Sequential([
    layers.Input(shape=(X_train_scaled.shape[1],)),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dropout(0.1),
    layers.Dense(16, activation='relu'),
    layers.Dense(1)  # Output layer per regressió
])

# Compilar el model
model_nn.compile(
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
    loss='mse',
    metrics=['mae', 'mse']
)

# Veure l'arquitectura del model
model_nn.summary()


In [None]:
# Entrenar el model
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=50,
    min_lr=0.00001
)

history = model_nn.fit(
    X_train_scaled, y_train_nn,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    callbacks=[early_stopping, reduce_lr],
    verbose=1
)


In [None]:
# Visualitzar el procés d'entrenament
plt.figure(figsize=(14, 5))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.title('Model Loss During Training')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Train MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error')
plt.title('Model MAE During Training')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
# Avaluar el model
y_pred_nn = model_nn.predict(X_test_scaled).flatten()

# Calcular mètriques
mse_nn = mean_squared_error(y_test_nn, y_pred_nn)
rmse_nn = np.sqrt(mse_nn)
r2_nn = 1 - (np.sum((y_test_nn - y_pred_nn)**2) / np.sum((y_test_nn - y_test_nn.mean())**2))
mae_nn = np.mean(np.abs(y_test_nn - y_pred_nn))

print("=" * 50)
print("XARXA NEURONAL - Resultats")
print("=" * 50)
print(f'MSE:  {mse_nn:.2f}')
print(f'RMSE: {rmse_nn:.2f}')
print(f'MAE:  {mae_nn:.2f}')
print(f'R² Score: {r2_nn:.4f}')
print("=" * 50)

# Comparar amb RandomForest
print("\nCOMPARACIÓ AMB RANDOM FOREST")
print("=" * 50)
print(f'Random Forest MSE:  {mse:.2f}')
print(f'Random Forest RMSE: {rmse:.2f}')
print(f'Random Forest R²:   {r2_score:.4f}')
print("=" * 50)
print(f'\nMillora en RMSE: {((rmse - rmse_nn) / rmse * 100):.2f}%')
print(f'Millora en R²: {((r2_nn - r2_score) / r2_score * 100):.2f}%')


In [None]:
# Visualitzar resultats de la Xarxa Neuronal
fig, axes = plt.subplots(1, 3, figsize=(18, 5))

# Plot 1: Predicted vs Actual
axes[0].scatter(y_test_nn, y_pred_nn, alpha=0.5, color='blue')
axes[0].plot([y_test_nn.min(), y_test_nn.max()], [y_test_nn.min(), y_test_nn.max()], 'r--', lw=2)
axes[0].set_xlabel('Actual Turnover')
axes[0].set_ylabel('Predicted Turnover')
axes[0].set_title(f'Neural Network: Predicted vs Actual\nR² = {r2_nn:.4f}')
axes[0].grid(True, alpha=0.3)

# Plot 2: Residuals
residuals_nn = y_test_nn - y_pred_nn
axes[1].scatter(y_pred_nn, residuals_nn, alpha=0.5, color='green')
axes[1].axhline(y=0, color='r', linestyle='--', lw=2)
axes[1].set_xlabel('Predicted Turnover')
axes[1].set_ylabel('Residuals')
axes[1].set_title(f'Residual Plot\nRMSE = {rmse_nn:.2f}')
axes[1].grid(True, alpha=0.3)

# Plot 3: Distribution of Errors
axes[2].hist(residuals_nn, bins=50, edgecolor='black', alpha=0.7, color='purple')
axes[2].axvline(x=0, color='r', linestyle='--', lw=2)
axes[2].set_xlabel('Residuals')
axes[2].set_ylabel('Frequency')
axes[2].set_title(f'Distribution of Prediction Errors\nMAE = {mae_nn:.2f}')
axes[2].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()
