In [1]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.arima.model import ARIMA

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [2]:
database_path = "./data/R&D_automotive.xls"

In [3]:
data = pd.read_excel(database_path)

In [4]:
df_cleaned = data.dropna()

In [5]:
df_cleaned = df_cleaned.drop(columns=['Employees'])

In [6]:
df_cleaned = df_cleaned.reset_index(drop=True)

In [7]:
features = data[['Company', 'Op.profits (€million)', 'Net sales (€million)', 'R&D  (€million)', 'Capex (€million)']]

In [8]:
all_data_for_optimization = features[features['R&D  (€million)'] > 1000]

In [9]:
# Split the data into features and target
X = all_data_for_optimization[['R&D  (€million)', 'Capex (€million)']]
y = all_data_for_optimization['Op.profits (€million)']

In [10]:
X

Unnamed: 0,R&D (€million),Capex (€million)
0,7203.000000,8087.000000
1,9515.000000,10493.000000
2,11743.000000,11385.000000
3,13120.000000,12012.000000
4,13612.000000,13213.000000
...,...,...
275,1828.179753,3519.200206
276,2035.773396,3803.744743
277,2323.170004,4077.426555
278,2319.749927,4785.803757


In [11]:
y

0      10930.000000
1       8333.000000
2      11500.000000
3      12139.000000
4      -1228.000000
           ...     
275     3563.240390
276     1896.053480
277     2772.031700
278     1793.648196
279     4969.664897
Name: Op.profits (€million), Length: 180, dtype: float64

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [13]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [14]:
# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
X_test

array([[ 1.6844529 ,  0.16022965],
       [ 1.38931862,  3.12501942],
       [-0.96463056, -0.64118686],
       [-0.25955793,  0.68833375],
       [-0.87147036, -0.66830418],
       [ 0.70943417, -0.15884261],
       [ 0.09222625, -0.00501493],
       [ 0.57299029,  0.54709929],
       [-0.6771595 , -0.65257097],
       [-0.92407999, -0.43238395],
       [-0.06655623, -0.50585621],
       [-0.76900361, -0.61813239],
       [ 0.8226108 ,  2.25680997],
       [-0.84245336, -0.47484664],
       [ 0.02956475, -0.12054885],
       [ 1.02605586, -0.03656162],
       [ 0.80558752, -0.46756531],
       [-1.00759445, -0.68688812],
       [-0.74947808, -0.49353798],
       [ 0.11571594, -0.20262421],
       [-0.77966247, -0.50825977],
       [-0.75184667, -0.40572782],
       [-0.81550099, -0.53792946],
       [ 0.81665165,  0.07310069],
       [ 3.5561996 ,  0.67939806],
       [-0.89349918, -0.38929931],
       [ 0.75394357, -0.03827458],
       [ 1.49925908,  0.21055413],
       [-0.97553969,

In [16]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [17]:
# Build the neural network model
model = Sequential([
    Dense(64, input_dim=2, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='linear')
])

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

In [18]:

# Train the model
model.fit(X_train, y_train, epochs=100, batch_size=10, validation_split=0.2)

Epoch 1/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 37ms/step - loss: 56286880.0000 - mae: 5574.6548 - val_loss: 32177214.0000 - val_mae: 4420.2383
Epoch 2/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 59036728.0000 - mae: 5657.4380 - val_loss: 32176810.0000 - val_mae: 4420.1943
Epoch 3/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 48964556.0000 - mae: 5088.3242 - val_loss: 32176376.0000 - val_mae: 4420.1484
Epoch 4/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 54633440.0000 - mae: 5209.8745 - val_loss: 32175888.0000 - val_mae: 4420.0952
Epoch 5/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 44179244.0000 - mae: 4925.4380 - val_loss: 32175340.0000 - val_mae: 4420.0366
Epoch 6/100
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 48632336.0000 - mae: 5131.4629 - val_loss: 32174722

<keras.src.callbacks.history.History at 0x212da4cdca0>

In [19]:
# Evaluate the model
loss, mae = model.evaluate(X_test, y_test)
print(f"Mean Absolute Error: {mae}")

[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 64967508.0000 - mae: 5618.0791 
Mean Absolute Error: 5740.6064453125


In [20]:
print(f"Root Mean Squared Error: {np.sqrt(mae)}")

Root Mean Squared Error: 75.76678985751278


In [21]:
from scipy.optimize import differential_evolution

# Define the profit function using the trained model
def profit_function(params):
    R_and_D, Capex = params
    return -model.predict(np.array([[R_and_D, Capex]]))[0]  # Negate for maximization

# Bounds based on your R&D and Capex ranges
bounds = [(X['R&D  (€million)'].min(), X['R&D  (€million)'].max()), (X['Capex (€million)'].min(), X['Capex (€million)'].max())]

# Perform differential evolution to find the optimal values
result = differential_evolution(profit_function, bounds, strategy='best1bin', maxiter=1000, popsize=15, tol=0.01, mutation=(0.5, 1), recombination=0.7, seed=42)
optimal_params = result.x
optimal_profit = -result.fun

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23

In [22]:
R_and_D_range = np.linspace(X['R&D  (€million)'].min(), X['R&D  (€million)'].max(), 50)
Capex_range = np.linspace(X['Capex (€million)'].min(), X['Capex (€million)'].max(), 50)
R_and_D_grid, Capex_grid = np.meshgrid(R_and_D_range, Capex_range)

# Vectorized prediction across the grid
profits_grid = model.predict(np.c_[R_and_D_grid.ravel(), Capex_grid.ravel()])
profits_grid = profits_grid.reshape(R_and_D_grid.shape)

[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 705us/step


In [23]:
import plotly.graph_objects as go

# Assuming R_and_D_grid, Capex_grid, and profits_grid are numpy arrays
# and optimal_params is a list or array containing the optimal R&D and Capex values

fig = go.Figure()

# Surface plot
fig.add_trace(go.Surface(z=profits_grid, x=R_and_D_grid, y=Capex_grid, colorscale='Viridis', opacity=0.8))

# Optimal point
fig.add_trace(go.Scatter3d(x=[optimal_params[0]], y=[optimal_params[1]], z=[-profit_function(optimal_params)],
                           mode='markers', marker=dict(size=10, color='red'), name='Optimal Point'))

# Adding labels and title
fig.update_layout(scene=dict(
                    xaxis_title='R&D (€million)',
                    yaxis_title='Capex (€million)',
                    zaxis_title='Operating Profit (€million)'),
                  title='Operating Profits vs. R&D and Capex Spending',
                  coloraxis_colorbar=dict(title='Operating Profit (€million)'))

fig.show()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
