# Import Libraries

In [1]:
import pandas as pd
import matplotlib as pt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
%matplotlib inline
pt.style.use('ggplot')

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor

# Import Dataset

In [2]:
prestamos180dias = pd.read_csv("BacPrestamos180Dias.csv") 

In [3]:
prestamos180dias.head()

Unnamed: 0,RowId,Año,Mes,Actividad,91-180
0,2148,2016,11,Consumo,6027686000.0
1,2158,2016,12,Inmuebles,1154612000.0
2,2166,2016,12,Consumo,6318230000.0
3,2176,2017,1,Inmuebles,1539614000.0
4,2184,2017,1,Consumo,6242209000.0


# Data Cleansing

In [4]:
#Usar Categoria 1 para Inmuebles,2 para consumo
prestamos180dias['Actividad'].replace(['Inmuebles','Consumo'],[1,2],inplace=True)
#Quitar columna RowId
prestamos180dias.drop(['RowId'], axis=1,inplace=True)

In [5]:
#Reordenar columnas
prestamos180dias.columns.tolist()

['Año', 'Mes', 'Actividad', '91-180']

In [6]:
prestamos180dias.tail()

Unnamed: 0,Año,Mes,Actividad,91-180
311,2016,9,1,994899100.0
312,2016,9,2,6160183000.0
313,2016,10,1,1254010000.0
314,2016,10,2,6497689000.0
315,2016,11,1,1036636000.0


# Definir Conjuntos de Datos 

In [7]:
x = prestamos180dias.iloc[:,0:3]
y= prestamos180dias.iloc[:,3]

In [8]:
x.head()

Unnamed: 0,Año,Mes,Actividad
0,2016,11,2
1,2016,12,1
2,2016,12,2
3,2017,1,1
4,2017,1,2


In [9]:
y.head()

0    6.027686e+09
1    1.154612e+09
2    6.318230e+09
3    1.539614e+09
4    6.242209e+09
Name: 91-180, dtype: float64

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
x_train.shape, x_test.shape,y_train.shape,y_test.shape

((221, 3), (95, 3), (221,), (95,))

# Modelos Regresion

Usando Linear Regression

In [11]:
lreg = LinearRegression().fit(x_train, y_train)
lpred = lreg.predict(x_test)

# The coefficients
print('Coefficients: \n', lreg.coef_)
# The mean squared error
print('Mean squared error: %.2f'
      % mean_squared_error(y_test,lpred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
      % r2_score(y_test, lpred))
print('-------------------------------------------------------------------------')
print('Mean Absolute Error:', mean_absolute_error(y_test, lpred))  
print('Mean Squared Error:', mean_squared_error(y_test, lpred))  
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, lpred)))

Coefficients: 
 [8.22861853e+08 1.19515100e+08 5.07742252e+09]
Mean squared error: 20992656553126928384.00
Coefficient of determination: 0.49
-------------------------------------------------------------------------
Mean Absolute Error: 2994686849.844932
Mean Squared Error: 2.099265655312693e+19
Root Mean Squared Error: 4581774389.156119


Usando Random Forest Regressor

In [12]:
RFreg = RandomForestRegressor()
RFreg = RFreg.fit(x_train, y_train)
RFpred = RFreg.predict(x_test)
print('-------------------------------------------------------------------------')
print('Mean Absolute Error:', mean_absolute_error(y_test, RFpred))  
print('Mean Squared Error:', mean_squared_error(y_test, RFpred))  
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, RFpred)))

-------------------------------------------------------------------------
Mean Absolute Error: 386695071.2206032
Mean Squared Error: 4.130164768080974e+17
Root Mean Squared Error: 642663579.8052489


Importar dataset de Prediccion

In [13]:
dspred = pd.read_csv("BacPrestamos180DiasPred.csv") 

In [14]:
#Usar Categoria 1 para Inmuebles,2 para consumo
dspred['Actividad'].replace(['Inmuebles','Consumo'],[1,2],inplace=True)
#Quitar columna RowId
dspred.drop(['91-180'], axis=1,inplace=True)
dspred.drop(['RowId'], axis=1,inplace=True)

In [15]:
dspred.head()

Unnamed: 0,Año,Mes,Actividad
0,2020,1,1
1,2020,1,2
2,2020,2,1
3,2020,2,2
4,2020,3,1


In [16]:
FinalPred=RFreg.predict(dspred)

In [17]:
FinalPred

array([1.32577199e+09, 2.32509916e+10, 1.96550755e+09, 2.30226532e+10,
       1.80601523e+09, 2.30133966e+10, 2.11187084e+09, 2.34116272e+10,
       2.26744509e+09, 2.39722901e+10, 2.32788901e+09, 2.54804436e+10,
       2.15382275e+09, 2.62772526e+10, 2.54211840e+09, 2.74360687e+10,
       2.82514071e+09, 2.79200815e+10, 1.95779847e+09, 2.76465230e+10,
       2.53832759e+09, 2.73278597e+10, 2.21014375e+09, 2.72447591e+10])

In [18]:
dspred['91-180']=FinalPred

In [19]:
dspred.to_csv("BacPrestamos180DiasPred.csv") 

In [20]:
dspred.head()

Unnamed: 0,Año,Mes,Actividad,91-180
0,2020,1,1,1325772000.0
1,2020,1,2,23250990000.0
2,2020,2,1,1965508000.0
3,2020,2,2,23022650000.0
4,2020,3,1,1806015000.0
