In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np

In [2]:
df = pd.read_csv('Advertising.csv')
print(df.describe())
print(df.corr())

       Unnamed: 0          TV       Radio   Newspaper       Sales
count  200.000000  200.000000  200.000000  200.000000  200.000000
mean   100.500000  147.042500   23.264000   30.554000   14.022500
std     57.879185   85.854236   14.846809   21.778621    5.217457
min      1.000000    0.700000    0.000000    0.300000    1.600000
25%     50.750000   74.375000    9.975000   12.750000   10.375000
50%    100.500000  149.750000   22.900000   25.750000   12.900000
75%    150.250000  218.825000   36.525000   45.100000   17.400000
max    200.000000  296.400000   49.600000  114.000000   27.000000
            Unnamed: 0        TV     Radio  Newspaper     Sales
Unnamed: 0    1.000000  0.017715 -0.110680  -0.154944 -0.051616
TV            0.017715  1.000000  0.054809   0.056648  0.782224
Radio        -0.110680  0.054809  1.000000   0.354104  0.576223
Newspaper    -0.154944  0.056648  0.354104   1.000000  0.228299
Sales        -0.051616  0.782224  0.576223   0.228299  1.000000


Modelo de regresión lineal

In [3]:
X = df[['TV', 'Radio', 'Newspaper']]
y = df['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

modelo = LinearRegression()
modelo.fit(X_train, y_train)

y_pred = modelo.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print(f'Error cuadrático medio: {mse}')

print('Coeficientes del modelo:', modelo.coef_)
print('Intercepto:', modelo.intercept_)

Error cuadrático medio: 3.1740973539761046
Coeficientes del modelo: [0.04472952 0.18919505 0.00276111]
Intercepto: 2.979067338122629


Distribuciones de probabilidad

In [4]:
def obtener_parametros_distribucion(variable):
    min_val = variable.min()
    max_val = variable.max()
    mode_val = variable.median()

    # Asegurarse de que el orden de los parámetros es correcto
    if mode_val < min_val:
        mode_val = min_val
    elif mode_val > max_val:
        mode_val = max_val

    return min_val, mode_val, max_val

tv_params = obtener_parametros_distribucion(df['TV'])
radio_params = obtener_parametros_distribucion(df['Radio'])
newspaper_params = obtener_parametros_distribucion(df['Newspaper'])

print(tv_params, radio_params, newspaper_params)

(0.7, 149.75, 296.4) (0.0, 22.9, 49.6) (0.3, 25.75, 114.0)


Simulación de Montecarlo

In [5]:
def simulacion_montecarlo(tv_params, radio_params, newspaper_params, intentos=1000):
    resultados = []
    for _ in range(intentos):
        tv = np.random.triangular(*tv_params)
        radio = np.random.triangular(*radio_params)
        newspaper = np.random.triangular(*newspaper_params)
        entrada = pd.DataFrame([[tv, radio, newspaper]], columns=['TV', 'Radio', 'Newspaper'])
        ventas = modelo.predict(entrada)[0]
        resultados.append((tv, radio, newspaper, ventas))
    return resultados

resultados = simulacion_montecarlo(tv_params, radio_params, newspaper_params)

resultados_df = pd.DataFrame(resultados, columns=['TV', 'Radio', 'Newspaper', 'Sales'])
print(resultados_df.head())

           TV      Radio  Newspaper      Sales
0  223.991140  12.924973  15.499384  15.486220
1   97.344767  30.382263  40.372119  13.192898
2  179.244508  37.989523  25.489987  18.254398
3   74.487860  20.191237  69.193228  10.322006
4  165.353989  34.522589  37.897250  17.011413


Valor del presupuesto

In [7]:
promedios = resultados_df.mean()

total_promedios = promedios['TV'] + promedios['Radio'] + promedios['Newspaper']
tv_percent = promedios['TV'] / total_promedios
radio_percent = promedios['Radio'] / total_promedios
newspaper_percent = promedios['Newspaper'] / total_promedios

print(f'Presupuesto normalizado (TV): {tv_percent:.2%}')
print(f'Presupuesto normalizado (Radio): {radio_percent:.2%}')
print(f'Presupuesto normalizado (Newspaper): {newspaper_percent:.2%}')

Presupuesto normalizado (TV): 67.64%
Presupuesto normalizado (Radio): 10.97%
Presupuesto normalizado (Newspaper): 21.39%
