# Un modelo de mezcla de marketing mejorado en Python

![banner](https://raw.githubusercontent.com/MayumyCH/marketing_mix_modeling_project_python/main/resources/banner.jpeg)

_Este proyecto propone el análisis de la data proporcionada por Kaggle para el área de Marketing; se busca encontrar **"Qué inversión publicitaria impulsa** realmente **las ventas"**._

In [52]:
import pandas as pd
import numpy as np
import datetime as dt

from sklearn import set_config
set_config(display='diagram')

In [53]:
# IMPORTAR los datos desde Computador
url_data = "dataset/datasetMedium.csv"

# Importar los datos desde un link
#url_data = "https://raw.githubusercontent.com/MayumyCH/marketing_mix_modeling_project_python/main/dataset/data_marketing.csv"

data_Marketing = pd.read_csv(url_data, delimiter=',',parse_dates = ['Date'],index_col = 'Date')

data_Marketing.head()
# data_Marketing.sample(5)

Unnamed: 0_level_0,TV,Radio,Banners,Sales
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-01-07,13528.1,0.0,0.0,9779.8
2018-01-14,0.0,5349.65,2218.93,13245.19
2018-01-21,0.0,4235.86,2046.96,12022.66
2018-01-28,0.0,3562.21,0.0,8846.95
2018-02-04,0.0,0.0,2187.29,9797.07


In [54]:
# MODELO
target = 'Sales'
predictoras = ['TV','Radio','Banners']
X = data_Marketing[predictoras]
y = data_Marketing[target]

## DIVISION DE LA DATA

In [55]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(X, y,test_size=0.3, random_state=888)
len(x_train)

140

## 🛠 Feature Engineering

In [56]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, check_array

from scipy.signal import convolve2d

In [57]:
# ExponentialSaturation es un transformer
class ExponentialSaturation (BaseEstimator, TransformerMixin):
    def __init__ (self, a = 1.):
        self.a = a
        
    def fit(self, X, y = None):
        X = check_array(X) 
        self._check_n_features(X, reset = True) # de BaseEstimator
        return self
    
    def transform (self, X):
        check_is_fitted (self)
        X = check_array (X)
        self._check_n_features (X, reset = False) # de BaseEstimator (para qué es el reset?)
        return 1 - np.exp (-self.a * X)

In [58]:
featureTransformer = ['TV','Radio','Banners']
exSat = ExponentialSaturation()
# exSat.fit(x_train[featureTransformer])
# exSat.transform(x_test[featureTransformer])

In [59]:
df = pd.DataFrame()
nombres = ['A', 'B', 'C','D', 'E', 'F','G','H']
edades = [100, 50, 30,120,9001,5,0,80]

df['Nombre'] = nombres
df['Spends'] = edades
df

exSat.fit(df[['Spends']])
resultttt = exSat.transform(df[['Spends']])
resultttt

array([[1.        ],
       [1.        ],
       [1.        ],
       [1.        ],
       [1.        ],
       [0.99326205],
       [0.        ],
       [1.        ]])

In [60]:
class ExponentialCarryover (BaseEstimator, TransformerMixin): 
    def __init__(self, strength = 0.5, length = 1):
        self.strength = strength
        self.length = length
        
    def fit(self, X, y = None):
        X = check_array(X)
        self._check_n_features(X, reset=True)
        self.sliding_window_ = (
            self.strength ** np.arange(self.length + 1)
        ).reshape(-1, 1)
        return self

    def transform(self, X: np.ndarray):
        check_is_fitted(self)
        X = check_array(X)
        self._check_n_features(X, reset=False)
        convolution = convolve2d(X, self.sliding_window_)
        if self.length > 0:
            convolution = convolution[: -self.length]
        return convolution

In [61]:
featureTransformer = ['TV','Radio','Banners']
exCarry = ExponentialCarryover()
# exCarry.fit(x_train[featureTransformer])
# exCarry.transform(x_test[featureTransformer])

# PIPELINES

## ColumnTransformer

In [62]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression

In [63]:
# PIPELINE CON LAS 2 TRANSFORMACIONES 
# Que son necesarias para mis features numericos de interes

transformer_pipeline = Pipeline(
    [
        ('carryover', ExponentialCarryover()),
        ('saturation', ExponentialSaturation())
    ]
)

In [64]:
# adstock = ColumnTransformer(
#     [
#         ('adstock_ct',
#         adstock_pipe,
#         ['TV','Radio','Social Media'])
# ])

In [65]:
featureTransformer = [
    'TV',
    'Radio',
    'Banners'
]

# transformer_num == adstock
transformer_num= ColumnTransformer(
    [
        ("transformer_pipeline", transformer_pipeline, featureTransformer),
    ],
    remainder="passthrough" # Las demas variables no sean transformadas
)

display(transformer_num)

In [66]:
X_transformed = transformer_num.fit_transform(x_train, y_train)
X_transformed

array([[1., 0., 1.],
       [1., 0., 1.],
       [0., 0., 1.],
       [1., 0., 1.],
       [1., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 0., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [1., 0., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 1., 0.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 0., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [0., 1., 1.],
       [1., 0., 1.],
       [1., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [1., 1., 1.],
       [1., 1., 1.],
       [0., 1., 1.],
       [1., 0., 1.],
       [1., 0

In [67]:
X_transformed.shape

(140, 3)

In [68]:
num_cols = featureTransformer

num_pipeline = Pipeline(
    [
        ("transformer_num", transformer_num), #Transformacion de los datos numericos
    ]
)
display(num_pipeline)

# Como no tengo pipeline categorico
full_pipeline = num_pipeline

In [69]:
model = Pipeline(
    [
        ("transformer_num", transformer_num), #Transformacion de los datos numericos
        ("regression", LinearRegression())
    ]
)

In [43]:
# X_num = num_pipeline.fit_transform(x_train[num_cols], y_train)
# print(X_num)

In [71]:
model.fit(x_train, y_train)

In [74]:
from sklearn.model_selection import cross_val_score, TimeSeriesSplit
print(cross_val_score(model, x_train, y_train, cv=TimeSeriesSplit()).mean())

0.1871970334354187


In [None]:
## TUNEANDO LOS VALORES

In [75]:
from optuna.integration import OptunaSearchCV
from optuna.distributions import UniformDistribution,IntUniformDistribution

tuned_model = OptunaSearchCV(
    estimator=model,
    param_distributions={
    'adstock__tv_pipe__carryover__strength': UniformDistribution(0, 1),
    'adstock__tv_pipe__carryover__length': IntUniformDistribution(0, 6),
    'adstock__tv_pipe__saturation__a': UniformDistribution(0,0.01),
    'adstock__radio_pipe__carryover__strength': UniformDistribution(0, 1),
    'adstock__radio_pipe__carryover__length': IntUniformDistribution(0, 6),
    'adstock__radio_pipe__saturation__a': UniformDistribution(0,0.01),
    'adstock__banners_pipe__carryover__strength': UniformDistribution(0, 1),
    'adstock__banners_pipe__carryover__length': IntUniformDistribution(0, 6),
    'adstock__banners_pipe__saturation__a': UniformDistribution(0, 0.01),
    },
    n_trials=1000,

    cv=TimeSeriesSplit(),
    random_state=88
)

ModuleNotFoundError: No module named 'optuna'

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder


In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import FeatureUnion
from sklearn.linear_model import LinearRegression

In [None]:
# Para las columnas "TV, Radio y Banners"
adstock_pipe = Pipeline([
    ('carryover', ExponentialCarryover()),
    ('saturation', ExponentialSaturation())
])



In [None]:
class ExponentialSaturation:
    def __init__(self, a=1.):
        self.a = a
    def transform(self, X):
        return 1 - np.exp(-self.a*X)


In [None]:
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.utils.validation import check_is_fitted, check_array

class ExponentialSaturation(BaseEstimator, TransformerMixin):
    def __init__(self, a=1.):
        self.a = a
    def fit(self, X, y=None):
        X = check_array(X)
        self._check_n_features(X, reset=True) # from BaseEstimator
        
        return self
    
def transform(self, X):
    check_is_fitted(self)
    X = check_array(X)
    self._check_n_features(X, reset=False) # from BaseEstimator 
    return 1 - np.exp (-self.a * X)


