# Import Libraries

In [1]:
import pandas as pd
import matplotlib as pt
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
cmap = sns.cubehelix_palette(dark=.3, light=.8, as_cmap=True)
%matplotlib inline
pt.style.use('ggplot')

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import RandomForestRegressor

# Import Dataset

In [2]:
prestamos60dias = pd.read_csv("BacPrestamos60Dias.csv") 

In [3]:
prestamos60dias.head()

Unnamed: 0,RowId,Año,Mes,Actividad,31-60
0,2148,2016,11,Consumo,10777420000.0
1,2158,2016,12,Inmuebles,2485078000.0
2,2166,2016,12,Consumo,9398391000.0
3,2176,2017,1,Inmuebles,1649180000.0
4,2184,2017,1,Consumo,10193020000.0


# Data Cleansing

In [4]:
#Usar Categoria 1 para Inmuebles,2 para consumo
prestamos60dias['Actividad'].replace(['Inmuebles','Consumo'],[1,2],inplace=True)
#Quitar columna RowId
prestamos60dias.drop(['RowId'], axis=1,inplace=True)

In [5]:
#Reordenar columnas
prestamos60dias.columns.tolist()

['Año', 'Mes', 'Actividad', '31-60']

In [6]:
prestamos60dias.tail()

Unnamed: 0,Año,Mes,Actividad,31-60
311,2016,9,1,2582113000.0
312,2016,9,2,8775598000.0
313,2016,10,1,2859263000.0
314,2016,10,2,9009363000.0
315,2016,11,1,3143100000.0


# Definir Conjuntos de Datos 

In [7]:
x = prestamos60dias.iloc[:,0:3]
y= prestamos60dias.iloc[:,3]

In [8]:
x.head()

Unnamed: 0,Año,Mes,Actividad
0,2016,11,2
1,2016,12,1
2,2016,12,2
3,2017,1,1
4,2017,1,2


In [9]:
y.head()

0    1.077742e+10
1    2.485078e+09
2    9.398391e+09
3    1.649180e+09
4    1.019302e+10
Name: 31-60, dtype: float64

In [10]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=0)
x_train.shape, x_test.shape,y_train.shape,y_test.shape

((221, 3), (95, 3), (221,), (95,))

# Modelos Regresion

Usando Linear Regression

In [11]:
lreg = LinearRegression().fit(x_train, y_train)
lpred = lreg.predict(x_test)

# The coefficients
print('Coefficients: \n', lreg.coef_)
# The mean squared error
print('Mean squared error: %.2f'
      % mean_squared_error(y_test,lpred))
# The coefficient of determination: 1 is perfect prediction
print('Coefficient of determination: %.2f'
      % r2_score(y_test, lpred))
print('-------------------------------------------------------------------------')
print('Mean Absolute Error:', mean_absolute_error(y_test, lpred))  
print('Mean Squared Error:', mean_squared_error(y_test, lpred))  
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, lpred)))

Coefficients: 
 [8.28515027e+08 9.65937647e+07 5.27854302e+09]
Mean squared error: 6640179076536857600.00
Coefficient of determination: 0.73
-------------------------------------------------------------------------
Mean Absolute Error: 1933875297.0541363
Mean Squared Error: 6.640179076536858e+18
Root Mean Squared Error: 2576854492.697804


Usando Random Forest Regressor

In [12]:
RFreg = RandomForestRegressor()
RFreg = RFreg.fit(x_train, y_train)
RFpred = RFreg.predict(x_test)
print('-------------------------------------------------------------------------')
print('Mean Absolute Error:', mean_absolute_error(y_test, RFpred))  
print('Mean Squared Error:', mean_squared_error(y_test, RFpred))  
print('Root Mean Squared Error:', np.sqrt(mean_squared_error(y_test, RFpred)))

-------------------------------------------------------------------------
Mean Absolute Error: 622339976.5578631
Mean Squared Error: 7.768230007533435e+17
Root Mean Squared Error: 881375629.7704989


Importar dataset de Prediccion

In [13]:
dspred = pd.read_csv("BacPrestamos60DiasPred.csv") 

In [14]:
#Usar Categoria 1 para Inmuebles,2 para consumo
dspred['Actividad'].replace(['Inmuebles','Consumo'],[1,2],inplace=True)
#Quitar columna RowId
dspred.drop(['31-60'], axis=1,inplace=True)
dspred.drop(['RowId'], axis=1,inplace=True)

In [15]:
dspred.head()

Unnamed: 0,Año,Mes,Actividad
0,2020,1,1
1,2020,1,2
2,2020,2,1
3,2020,2,2
4,2020,3,1


In [16]:
FinalPred=RFreg.predict(dspred)

In [17]:
FinalPred

array([6.70667337e+09, 1.58277062e+10, 6.10987689e+09, 1.76691507e+10,
       6.20705119e+09, 1.81819954e+10, 7.33601713e+09, 2.00289501e+10,
       7.42986367e+09, 2.03296098e+10, 7.29854493e+09, 1.96076408e+10,
       7.43185319e+09, 1.95816387e+10, 7.87271173e+09, 2.00332967e+10,
       8.22423531e+09, 2.03946649e+10, 7.44852167e+09, 2.02512935e+10,
       7.34210180e+09, 1.96181638e+10, 7.34380474e+09, 1.96229382e+10])

In [18]:
dspred['31-60']=FinalPred

In [19]:
dspred.to_csv("BacPrestamos60DiasPred.csv") 

In [20]:
dspred.head()

Unnamed: 0,Año,Mes,Actividad,31-60
0,2020,1,1,6706673000.0
1,2020,1,2,15827710000.0
2,2020,2,1,6109877000.0
3,2020,2,2,17669150000.0
4,2020,3,1,6207051000.0
