# Import Libraries

In [1]:
import pandas as pd
import matplotlib as pt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
%matplotlib inline
pt.style.use('ggplot')

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor

# Import Dataset

In [2]:
prestamosMas180dias = pd.read_csv("BacPrestamos+180Dias.csv") 

In [3]:
prestamosMas180dias.head()

Unnamed: 0,RowId,Año,Mes,Actividad,+180
0,2148,2016,11,Consumo,39149490.0
1,2158,2016,12,Inmuebles,282297900.0
2,2166,2016,12,Consumo,22865870.0
3,2176,2017,1,Inmuebles,282591400.0
4,2184,2017,1,Consumo,23633780.0


# Data Cleansing

In [4]:
#Usar Categoria 1 para Inmuebles,2 para consumo
prestamosMas180dias['Actividad'].replace(['Inmuebles','Consumo'],[1,2],inplace=True)
#Quitar columna RowId
prestamosMas180dias.drop(['RowId'], axis=1,inplace=True)

In [5]:
#Reordenar columnas
prestamosMas180dias.columns.tolist()

['Año', 'Mes', 'Actividad', '+180']

In [6]:
prestamosMas180dias.tail()

Unnamed: 0,Año,Mes,Actividad,+180
311,2016,9,1,74274639.77
312,2016,9,2,20738071.2
313,2016,10,1,74417389.34
314,2016,10,2,21424390.82
315,2016,11,1,74195787.63


# Definir Conjuntos de Datos 

In [7]:
x = prestamosMas180dias.iloc[:,0:3]
y= prestamosMas180dias.iloc[:,3]

In [8]:
x.head()

Unnamed: 0,Año,Mes,Actividad
0,2016,11,2
1,2016,12,1
2,2016,12,2
3,2017,1,1
4,2017,1,2


In [9]:
y.head()

0    3.914949e+07
1    2.822979e+08
2    2.286587e+07
3    2.825914e+08
4    2.363378e+07
Name: +180, dtype: float64

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
x_train.shape, x_test.shape,y_train.shape,y_test.shape

((221, 3), (95, 3), (221,), (95,))

# Modelos Regresion

Usando Linear Regression

In [11]:
lreg = LinearRegression().fit(x_train, y_train)
lpred = lreg.predict(x_test)

# The coefficients
print('Coefficients: \n', lreg.coef_)
# The mean squared error
print('Mean squared error: %.2f'
      % mean_squared_error(y_test,lpred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
      % r2_score(y_test, lpred))
print('-------------------------------------------------------------------------')
print('Mean Absolute Error:', mean_absolute_error(y_test, lpred))  
print('Mean Squared Error:', mean_squared_error(y_test, lpred))  
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, lpred)))

Coefficients: 
 [ 1.64460801e+07  3.05623651e+06 -1.46950600e+08]
Mean squared error: 24958378428581284.00
Coefficient of determination: 0.22
-------------------------------------------------------------------------
Mean Absolute Error: 99182660.05283625
Mean Squared Error: 2.4958378428581284e+16
Root Mean Squared Error: 157982209.21540907


Usando Random Forest Regressor

In [12]:
RFreg = RandomForestRegressor()
RFreg = RFreg.fit(x_train, y_train)
RFpred = RFreg.predict(x_test)
print('-------------------------------------------------------------------------')
print('Mean Absolute Error:', mean_absolute_error(y_test, RFpred))  
print('Mean Squared Error:', mean_squared_error(y_test, RFpred))  
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, RFpred)))

-------------------------------------------------------------------------
Mean Absolute Error: 56246518.66994523
Mean Squared Error: 9196451476108854.0
Root Mean Squared Error: 95898130.72270416


Importar dataset de Prediccion

In [13]:
dspred = pd.read_csv("BacPrestamos+180DiasPred.csv") 

In [14]:
#Usar Categoria 1 para Inmuebles,2 para consumo
dspred['Actividad'].replace(['Inmuebles','Consumo'],[1,2],inplace=True)
#Quitar columna RowId
dspred.drop(['Mas 180'], axis=1,inplace=True)
dspred.drop(['RowId'], axis=1,inplace=True)

In [15]:
dspred.head()

Unnamed: 0,Año,Mes,Actividad
0,2020,1,1
1,2020,1,2
2,2020,2,1
3,2020,2,2
4,2020,3,1


In [16]:
FinalPred=RFreg.predict(dspred)

In [17]:
FinalPred

array([2.71721021e+08, 6.39746536e+07, 2.30912752e+08, 8.44574141e+07,
       2.33726698e+08, 6.75612799e+07, 2.72676805e+08, 5.07517804e+07,
       2.59332233e+08, 4.67090481e+07, 1.93308146e+08, 4.79168641e+07,
       1.64206950e+08, 5.70203321e+07, 1.08738447e+08, 5.36897360e+07,
       9.42482475e+07, 5.07465073e+07, 1.21537383e+08, 5.52575488e+07,
       1.29361261e+08, 7.53656623e+07, 1.31285805e+08, 5.98864313e+07])

In [18]:
dspred['+180']=FinalPred

In [19]:
dspred.to_csv("BacPrestamos+180DiasPred.csv") 

In [20]:
dspred.head()

Unnamed: 0,Año,Mes,Actividad,+180
0,2020,1,1,271721000.0
1,2020,1,2,63974650.0
2,2020,2,1,230912800.0
3,2020,2,2,84457410.0
4,2020,3,1,233726700.0
