# Applying other models to dataset

In [22]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

df = pd.read_csv('/home/pikeblessed/proyecto_phnan/deploy-project-datascience/data/df_processed.csv')

In [23]:
X = df.drop(['reach', 'date', 'engagement'], axis=1)
y = df['reach']

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=.3, random_state=42)

### Decision Tree

In [24]:
tree = DecisionTreeRegressor(max_depth=2).fit(X_train, y_train)
y_pred = tree.predict(X_test)

In [25]:
#evaluating decision tree model
print('R2 score test: ', tree.score(X_test, y_test))
print('R2 score train: ', tree.score(X_train, y_train))
print('-'*10)
print('MAE: ', mean_absolute_error(y_test, y_pred))
print('-'*10)
print('MAPE: ', mean_absolute_percentage_error(y_test, y_pred))
print('-'*10)
print('RMSE: ', mean_squared_error(y_test, y_pred, squared=False))

R2 score test:  0.8305746459149218
R2 score train:  0.8634207351733671
----------
MAE:  833.7031803614498
----------
MAPE:  0.34550486092520794
----------
RMSE:  1196.1980368977665


### Random Forest

In [26]:
rf = RandomForestRegressor().fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [27]:
#evaluating random forest model
print('R2 score test: ', rf.score(X_test, y_test))
print('R2 score train: ', rf.score(X_train, y_train))
print('-'*10)
print('MAE: ', mean_absolute_error(y_test, y_pred))
print('-'*10)
print('MAPE: ', mean_absolute_percentage_error(y_test, y_pred))
print('-'*10)
print('RMSE: ', mean_squared_error(y_test, y_pred, squared=False))

R2 score test:  0.858186844281221
R2 score train:  0.9806199561483395
----------
MAE:  713.0902777777778
----------
MAPE:  0.22081419081858925
----------
RMSE:  1094.3900731154238


### Gradient Boosting Regressor

In [28]:
gbr = GradientBoostingRegressor().fit(X_train, y_train)
y_pred = gbr.predict(X_test)

In [29]:
#evaluating gradient boosting regressor
print('R2 score test: ', gbr.score(X_test, y_test))
print('R2 score train: ', gbr.score(X_train, y_train))
print('-'*10)
print('MAE: ', mean_absolute_error(y_test, y_pred))
print('-'*10)
print('MAPE: ', mean_absolute_percentage_error(y_test, y_pred))
print('-'*10)
print('RMSE: ', mean_squared_error(y_test, y_pred, squared=False))

R2 score test:  0.8564117960310353
R2 score train:  0.9914761101079358
----------
MAE:  699.2588510376538
----------
MAPE:  0.22630267204738966
----------
RMSE:  1101.2179098892188
