In [28]:
# Cargando las librerías por categoría
# data analysis and wrangling
import pandas as pd
import numpy as np
import random as rnd

# Generic
import os
import datetime
import itertools
import warnings
warnings.filterwarnings("ignore")

# visualization
import seaborn as sns
from scipy.stats import norm
import matplotlib.pyplot as plt
%matplotlib inline

# machine learning
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.svm import LinearSVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neural_network import MLPClassifier

## scikit modeling libraries
from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier,
                             GradientBoostingClassifier, ExtraTreesClassifier,
                             VotingClassifier)

from sklearn.model_selection import (GridSearchCV, cross_val_score, cross_val_predict,
                                     StratifiedKFold, learning_curve)

## Load metrics for predictive modeling
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_curve, auc


from sklearn.preprocessing import LabelEncoder
import pickle

In [4]:
#Cargamos los datos ya tratados tras el EDA
ruta=("/content/Modelar_EDA.pkl")
df= pd.read_pickle(ruta)

In [5]:
df.shape 

(36392, 56)

$$Uso de Neo-channels (para su uso con XGBoost)$$
$$Luminance$$
$$L = max(RGB) + min(RGB)/2$$
$$NDVI (Normalized Difference VEgetation Index)$$
$$NDVI = NIR- R/ NIR + R$$
$$ExG(Excess Green Index)$$
$$ExG = 2 * G -R -B/R+G+B$$
$$MNDWI (Modified Normalized Difference Water index)$$
$$MNDWI = G - NIR/G+NIR$$

In [9]:
# Máximos
df['max_red'] = df.loc[:, 'Q_R_4_0_0':'Q_R_4_1_0'].max(axis=1)
df['max_green'] = df.loc[:, 'Q_G_3_0_0':'Q_G_3_1_0'].max(axis=1)
df['max_blue'] = df.loc[:, 'Q_B_2_0_0':'Q_B_2_1_0'].max(axis=1)
df['max_nir'] = df.loc[:, 'Q_NIR_8_0_0':'Q_NIR_8_1_0'].max(axis=1)
df['max_total_RGB']= df['max_red'] + df['max_green'] + df['max_blue']

In [10]:
# Mínimos
df['min_red'] = df.loc[:, 'Q_R_4_0_0':'Q_R_4_1_0'].min(axis=1)
df['min_green'] = df.loc[:, 'Q_G_3_0_0':'Q_G_3_1_0'].min(axis=1)
df['min_blue'] = df.loc[:, 'Q_B_2_0_0':'Q_B_2_1_0'].min(axis=1)
df['min_nir'] = df.loc[:, 'Q_NIR_8_0_0':'Q_NIR_8_1_0'].min(axis=1)
df['min_total_RGB']= df['min_red'] + df['min_green'] + df['min_blue']


In [11]:
# Medias
df['mean_red'] = df.loc[:, 'Q_R_4_0_0':'Q_R_4_1_0'].mean(axis=1)
df['mean_green'] = df.loc[:, 'Q_G_3_0_0':'Q_G_3_1_0'].mean(axis=1)
df['mean_blue'] = df.loc[:, 'Q_B_2_0_0':'Q_B_2_1_0'].mean(axis=1)
df['mean_nir'] = df.loc[:, 'Q_NIR_8_0_0':'Q_NIR_8_1_0'].mean(axis=1)

In [12]:
# Suma
df['sum_red'] = df.loc[:, 'Q_R_4_0_0':'Q_R_4_1_0'].sum(axis=1)
df['sum_green'] = df.loc[:, 'Q_G_3_0_0':'Q_G_3_1_0'].sum(axis=1)
df['sum_blue'] = df.loc[:, 'Q_B_2_0_0':'Q_B_2_1_0'].sum(axis=1)
df['sum_nir'] = df.loc[:, 'Q_NIR_8_0_0':'Q_NIR_8_1_0'].sum(axis=1)
df['sum_total'] = df['sum_red'] + df['sum_green'] + df['sum_blue'] + df['sum_nir'] 

In [13]:
df.head()

Unnamed: 0,ID,X,Y,Q_R_4_0_0,Q_R_4_0_1,Q_R_4_0_2,Q_R_4_0_3,Q_R_4_0_4,Q_R_4_0_5,Q_R_4_0_6,Q_R_4_0_7,Q_R_4_0_8,Q_R_4_0_9,Q_R_4_1_0,Q_G_3_0_0,Q_G_3_0_1,Q_G_3_0_2,Q_G_3_0_3,Q_G_3_0_4,Q_G_3_0_5,Q_G_3_0_6,Q_G_3_0_7,Q_G_3_0_8,Q_G_3_0_9,Q_G_3_1_0,Q_B_2_0_0,Q_B_2_0_1,Q_B_2_0_2,Q_B_2_0_3,Q_B_2_0_4,Q_B_2_0_5,Q_B_2_0_6,Q_B_2_0_7,Q_B_2_0_8,Q_B_2_0_9,Q_B_2_1_0,Q_NIR_8_0_0,Q_NIR_8_0_1,Q_NIR_8_0_2,Q_NIR_8_0_3,Q_NIR_8_0_4,Q_NIR_8_0_5,Q_NIR_8_0_6,Q_NIR_8_0_7,Q_NIR_8_0_8,Q_NIR_8_0_9,Q_NIR_8_1_0,AREA,GEOM_R1,GEOM_R2,GEOM_R3,GEOM_R4,CONTRUCTIONYEAR,MAXBUILDINGFLOOR,CADASTRALQUALITYID,CLASE,max_red,max_green,max_blue,max_nir,max_total_RGB,min_red,min_green,min_blue,min_nir,min_total_RGB,mean_red,mean_green,mean_blue,mean_nir,sum_red,sum_green,sum_blue,sum_nir,sum_total
0,35984B9C3E7CD9A1,2207357872,165920300,0.0,443.0013,616.001697,746.998401,872.996472,1009.000946,1159.002319,1342.002466,1572.003418,1943.686816,6512.78727,1.283357e-10,428.996347,574.00094,676.00296,778.003931,880.999176,997.00022,1130.001782,1311.997949,1626.996155,8627.098641,155.003632,402.000092,458.999939,510.000061,567.999939,632.00061,681.000916,726.109186,791.000269,902.999719,1980.17228,1.3e-05,892.016479,1244.014185,1497.998303,1731.204688,1944.003906,2160.996533,2410.995264,2719.999756,3180.000342,10951.926645,144.4269,0.557237,0.067249,0.057372,0.853127,2002.0,0.0,1,5,6512.78727,8627.098641,1980.17228,10951.926645,17120.05819,0.0,1.283357e-10,155.003632,1.3e-05,155.003632,1474.316464,1548.281646,709.753331,2612.105101,16217.481105,17031.098101,7807.286641,28733.156114,69789.021961
1,F9D04BF6D037F8FB,2189757160,165463267,5.9e-05,443.899011,627.99906,770.001611,904.999988,1032.998474,1165.001636,1316.00011,1505.990771,1804.993469,6269.9356,1.352518e-11,433.998392,565.00011,662.996521,746.001318,828.998413,912.00033,1011.997162,1142.202637,1384.99458,5357.790999,120.000275,419.0,472.999939,509.999808,541.999976,572.0,602.999939,636.000122,673.000122,745.0,3647.195402,3e-05,846.002728,1139.006982,1363.99386,1558.000513,1749.993713,1956.000464,2194.001978,2507.001416,3012.991675,7048.367637,38.34255,0.709884,0.125156,0.147929,1.181953,1949.0,1.0,7,5,6269.9356,5357.790999,3647.195402,7048.367637,15274.922001,5.9e-05,1.352518e-11,120.000275,3e-05,120.000333,1440.165435,1185.998224,812.745053,2125.032818,15841.81979,13045.980461,8940.195582,23375.360996,61203.356828
2,B89D5711AFF8C423,2240147335,165690752,0.0,353.502274,523.003601,644.001831,760.997131,876.999634,1006.997498,1168.995544,1375.008179,1719.003235,5010.767611,0.0,379.001587,507.003815,600.000519,684.000488,768.997528,867.001587,992.99176,1165.998413,1443.000366,5240.232771,83.000389,363.000031,429.002502,473.000015,510.000092,549.000244,590.999939,635.999817,692.999817,796.498779,1947.739721,5e-06,807.001373,1124.002441,1365.009583,1570.995117,1768.992798,1978.989258,2228.999146,2561.999756,3088.000488,7013.073271,108.794384,0.517702,0.058268,0.081666,1.401552,1986.0,1.0,4,5,5010.767611,5240.232771,1947.739721,7013.073271,12198.740104,0.0,0.0,83.000389,5e-06,83.000389,1221.752412,1149.838985,642.840122,2137.005749,13439.276537,12648.228834,7071.241346,23507.063236,56665.809954
3,1C3478AC1522E7E4,2227146459,165934099,0.0,268.000613,376.999609,478.003784,575.001233,683.997742,809.005994,955.00296,1136.997144,1447.99729,5465.430281,6.341585e-06,310.999088,405.998712,485.000607,558.997925,641.001007,731.994189,838.004376,974.797485,1217.996167,5343.168796,0.00145,256.999487,295.999969,329.000092,366.000043,411.000549,457.000061,506.0,565.999817,663.999817,1424.396092,0.002285,1200.99447,1476.001294,1678.005042,1848.001782,2022.988464,2211.993506,2433.003687,2740.996582,3349.986328,6216.880538,155.224455,0.450871,0.053591,0.054201,1.011382,1999.0,1.0,1,5,5465.430281,5343.168796,1424.396092,6216.880538,12232.99517,0.0,6.341585e-06,0.00145,0.002285,0.001456,1108.766968,1046.178033,479.672489,2288.986725,12196.43665,11507.958359,5276.397378,25178.853978,54159.646365
4,4D12AA5009064345,2212350459,165681791,0.0,318.99791,492.003845,632.999634,757.002197,882.999908,1019.008911,1175.993103,1401.000488,1756.994141,7376.529218,0.0,284.000534,421.002472,526.003815,618.998779,705.999268,801.005432,912.999451,1069.996094,1348.497314,7460.394534,193.000626,431.505035,462.999939,492.000061,519.000427,547.000061,578.999878,617.999603,662.999939,750.000488,1940.40314,0.0,529.004425,790.996704,999.001617,1183.995239,1355.99292,1535.998779,1725.498962,1954.991455,2315.991455,7092.767616,1789.873366,0.458819,0.012858,0.019936,1.550478,1966.0,8.0,5,5,7376.529218,7460.394534,1940.40314,7092.767616,16777.326892,0.0,0.0,193.000626,0.0,193.000626,1437.593578,1286.263427,654.173563,1771.29447,15813.529356,14148.897692,7195.909197,19484.239173,56642.575419


In [14]:
#Nuevo DF con las nuevas variables
df['Luminance']= (df['max_total_RGB'] + df['min_total_RGB'])/2
df['NDVI']= df['sum_nir']- df['sum_red']/df['sum_nir'] + df['sum_red']
df['ExG'] = 2 * df['sum_red']-df['sum_green']-df['sum_blue']/df['sum_green']+df['sum_red']+df['sum_blue']
df['MNDWI'] = df['sum_green'] - df['sum_nir']/ df['sum_green'] + df['sum_nir']

In [15]:
df.head()

Unnamed: 0,ID,X,Y,Q_R_4_0_0,Q_R_4_0_1,Q_R_4_0_2,Q_R_4_0_3,Q_R_4_0_4,Q_R_4_0_5,Q_R_4_0_6,Q_R_4_0_7,Q_R_4_0_8,Q_R_4_0_9,Q_R_4_1_0,Q_G_3_0_0,Q_G_3_0_1,Q_G_3_0_2,Q_G_3_0_3,Q_G_3_0_4,Q_G_3_0_5,Q_G_3_0_6,Q_G_3_0_7,Q_G_3_0_8,Q_G_3_0_9,Q_G_3_1_0,Q_B_2_0_0,Q_B_2_0_1,Q_B_2_0_2,Q_B_2_0_3,Q_B_2_0_4,Q_B_2_0_5,Q_B_2_0_6,Q_B_2_0_7,Q_B_2_0_8,Q_B_2_0_9,Q_B_2_1_0,Q_NIR_8_0_0,Q_NIR_8_0_1,Q_NIR_8_0_2,Q_NIR_8_0_3,Q_NIR_8_0_4,Q_NIR_8_0_5,Q_NIR_8_0_6,Q_NIR_8_0_7,Q_NIR_8_0_8,Q_NIR_8_0_9,Q_NIR_8_1_0,AREA,GEOM_R1,GEOM_R2,GEOM_R3,GEOM_R4,CONTRUCTIONYEAR,MAXBUILDINGFLOOR,CADASTRALQUALITYID,CLASE,max_red,max_green,max_blue,max_nir,max_total_RGB,min_red,min_green,min_blue,min_nir,min_total_RGB,mean_red,mean_green,mean_blue,mean_nir,sum_red,sum_green,sum_blue,sum_nir,sum_total,Luminance,NDVI,ExG,MNDWI
0,35984B9C3E7CD9A1,2207357872,165920300,0.0,443.0013,616.001697,746.998401,872.996472,1009.000946,1159.002319,1342.002466,1572.003418,1943.686816,6512.78727,1.283357e-10,428.996347,574.00094,676.00296,778.003931,880.999176,997.00022,1130.001782,1311.997949,1626.996155,8627.098641,155.003632,402.000092,458.999939,510.000061,567.999939,632.00061,681.000916,726.109186,791.000269,902.999719,1980.17228,1.3e-05,892.016479,1244.014185,1497.998303,1731.204688,1944.003906,2160.996533,2410.995264,2719.999756,3180.000342,10951.926645,144.4269,0.557237,0.067249,0.057372,0.853127,2002.0,0.0,1,5,6512.78727,8627.098641,1980.17228,10951.926645,17120.05819,0.0,1.283357e-10,155.003632,1.3e-05,155.003632,1474.316464,1548.281646,709.753331,2612.105101,16217.481105,17031.098101,7807.286641,28733.156114,69789.021961,8637.530911,44950.072802,39428.173442,45762.567116
1,F9D04BF6D037F8FB,2189757160,165463267,5.9e-05,443.899011,627.99906,770.001611,904.999988,1032.998474,1165.001636,1316.00011,1505.990771,1804.993469,6269.9356,1.352518e-11,433.998392,565.00011,662.996521,746.001318,828.998413,912.00033,1011.997162,1142.202637,1384.99458,5357.790999,120.000275,419.0,472.999939,509.999808,541.999976,572.0,602.999939,636.000122,673.000122,745.0,3647.195402,3e-05,846.002728,1139.006982,1363.99386,1558.000513,1749.993713,1956.000464,2194.001978,2507.001416,3012.991675,7048.367637,38.34255,0.709884,0.125156,0.147929,1.181953,1949.0,1.0,7,5,6269.9356,5357.790999,3647.195402,7048.367637,15274.922001,5.9e-05,1.352518e-11,120.000275,3e-05,120.000333,1440.165435,1185.998224,812.745053,2125.032818,15841.81979,13045.980461,8940.195582,23375.360996,61203.356828,7697.461167,39216.503071,43418.989206,36419.54969
2,B89D5711AFF8C423,2240147335,165690752,0.0,353.502274,523.003601,644.001831,760.997131,876.999634,1006.997498,1168.995544,1375.008179,1719.003235,5010.767611,0.0,379.001587,507.003815,600.000519,684.000488,768.997528,867.001587,992.99176,1165.998413,1443.000366,5240.232771,83.000389,363.000031,429.002502,473.000015,510.000092,549.000244,590.999939,635.999817,692.999817,796.498779,1947.739721,5e-06,807.001373,1124.002441,1365.009583,1570.995117,1768.992798,1978.989258,2228.999146,2561.999756,3088.000488,7013.073271,108.794384,0.517702,0.058268,0.081666,1.401552,1986.0,1.0,4,5,5010.767611,5240.232771,1947.739721,7013.073271,12198.740104,0.0,0.0,83.000389,5e-06,83.000389,1221.752412,1149.838985,642.840122,2137.005749,13439.276537,12648.228834,7071.241346,23507.063236,56665.809954,6140.870246,36945.768061,34740.283054,36153.433544
3,1C3478AC1522E7E4,2227146459,165934099,0.0,268.000613,376.999609,478.003784,575.001233,683.997742,809.005994,955.00296,1136.997144,1447.99729,5465.430281,6.341585e-06,310.999088,405.998712,485.000607,558.997925,641.001007,731.994189,838.004376,974.797485,1217.996167,5343.168796,0.00145,256.999487,295.999969,329.000092,366.000043,411.000549,457.000061,506.0,565.999817,663.999817,1424.396092,0.002285,1200.99447,1476.001294,1678.005042,1848.001782,2022.988464,2211.993506,2433.003687,2740.996582,3349.986328,6216.880538,155.224455,0.450871,0.053591,0.054201,1.011382,1999.0,1.0,1,5,5465.430281,5343.168796,1424.396092,6216.880538,12232.99517,0.0,6.341585e-06,0.00145,0.002285,0.001456,1108.766968,1046.178033,479.672489,2288.986725,12196.43665,11507.958359,5276.397378,25178.853978,54159.646365,6116.498313,37374.806236,30357.29047,36684.624386
4,4D12AA5009064345,2212350459,165681791,0.0,318.99791,492.003845,632.999634,757.002197,882.999908,1019.008911,1175.993103,1401.000488,1756.994141,7376.529218,0.0,284.000534,421.002472,526.003815,618.998779,705.999268,801.005432,912.999451,1069.996094,1348.497314,7460.394534,193.000626,431.505035,462.999939,492.000061,519.000427,547.000061,578.999878,617.999603,662.999939,750.000488,1940.40314,0.0,529.004425,790.996704,999.001617,1183.995239,1355.99292,1535.998779,1725.498962,1954.991455,2315.991455,7092.767616,1789.873366,0.458819,0.012858,0.019936,1.550478,1966.0,8.0,5,5,7376.529218,7460.394534,1940.40314,7092.767616,16777.326892,0.0,0.0,193.000626,0.0,193.000626,1437.593578,1286.263427,654.173563,1771.29447,15813.529356,14148.897692,7195.909197,19484.239173,56642.575419,8485.163759,35296.956923,40487.090987,33631.75978


In [16]:
#Creamos un nuevo dataframe que incluya las nuevas variables creadas
df_new = df.drop(['ID','GEOM_R1','GEOM_R2','GEOM_R3','GEOM_R4','CLASE','max_red', 'max_green','max_blue', 'max_nir','max_total_RGB', 'min_red', 'min_green', 'min_blue', 'min_nir',	'min_total_RGB', 'mean_red','mean_green', 'mean_blue', 	'mean_nir', 'sum_red', 'sum_green', 'sum_blue', 'sum_nir','sum_total'],axis=1)

In [17]:
X_1=df_new
y_1=df['CLASE']

In [18]:
#Separamos lso datos en train y test
X_train, X_test, y_train, y_test = train_test_split(
    X_1, y_1, test_size=0.20)

In [19]:
#Aplicamos el clasificador de bosques aleatorios
clf = RandomForestClassifier(n_estimators=100, max_depth=8, random_state=13, max_features = 'auto').fit(X_train, y_train)
clf

RandomForestClassifier(max_depth=8, random_state=13)

In [20]:
#Cálculos del score al aplicar random forest
clf.score(X_train,y_train) 

0.898739394772095

In [21]:
predicted_rf = clf.predict(X_test)
predicted_rf

array([5, 5, 5, ..., 5, 5, 5])

In [22]:
print(classification_report(y_test, predicted_rf))

              precision    recall  f1-score   support

           0       0.88      0.24      0.38        29
           1       0.75      0.46      0.57       312
           2       0.50      0.03      0.06       134
           3       0.00      0.00      0.00        97
           4       0.82      0.04      0.08       207
           5       0.90      1.00      0.95      6345
           6       0.00      0.00      0.00       155

    accuracy                           0.89      7279
   macro avg       0.55      0.25      0.29      7279
weighted avg       0.85      0.89      0.85      7279



In [23]:
#Aplicación de SVM
SVM_Model = SVC()

In [24]:
SVM_Model.fit(X_train, y_train) 

SVC()

In [25]:
predicted_svc = SVM_Model.predict(X_test)
predicted_svc

array([5, 5, 5, ..., 5, 5, 5])

In [26]:
# Confidence score aplicado al y_test (SVM)
SVM_score = round(SVM_Model.score(X_train,y_train))

print(SVM_score)

print(classification_report(y_test, predicted_rf))

1
              precision    recall  f1-score   support

           0       0.88      0.24      0.38        29
           1       0.75      0.46      0.57       312
           2       0.50      0.03      0.06       134
           3       0.00      0.00      0.00        97
           4       0.82      0.04      0.08       207
           5       0.90      1.00      0.95      6345
           6       0.00      0.00      0.00       155

    accuracy                           0.89      7279
   macro avg       0.55      0.25      0.29      7279
weighted avg       0.85      0.89      0.85      7279



In [30]:
#Guardamos el modelo de random forest en formato pickle
pickle.dump(clf, open('model_img.pkl', 'wb'))