# MODELO SUPERVISADO 

##### OBJETIVO: Predecir cuanta energía eólica producirán los países de America del sur en los proximos 5 años 

In [85]:
# Se importan las librerías que se utilizarán: 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import scipy as stats
import re
import heapq
import matplotlib.cm as cm
import os
from sklearn.model_selection import train_test_split

In [86]:
# Se importa el dataset 
suram1 = pd.read_csv(r'..\\datasets\\suram1.csv')  

In [87]:
# Se consultan los datos únicos que integran la columna country 
suram1['country'].unique() 

array(['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador',
       'French Guiana', 'Guyana', 'Paraguay', 'Peru', 'Suriname',
       'Trinidad and Tobago', 'Uruguay', 'Venezuela'], dtype=object)

In [88]:
# Se consultan los nombres de las columnas
print(suram1.columns)

Index(['iso_code', 'country', 'year', 'coal_share_elec',
       'coal_elec_per_capita', 'electricity_generation', 'biofuel_electricity',
       'coal_electricity', 'fossil_electricity', 'gas_electricity',
       'hydro_electricity', 'nuclear_electricity', 'oil_electricity',
       'other_renewable_electricity',
       'other_renewable_exc_biofuel_electricity', 'renewables_electricity',
       'solar_electricity', 'wind_electricity', 'energy_per_capita',
       'fossil_cons_per_capita', 'fossil_share_elec', 'gas_share_elec',
       'gas_elec_per_capita', 'hydro_share_elec', 'hydro_elec_per_capita',
       'low_carbon_share_elec', 'low_carbon_electricity',
       'low_carbon_elec_per_capita', 'oil_share_elec', 'oil_elec_per_capita',
       'other_renewables_elec_per_capita', 'other_renewables_share_elec',
       'per_capita_electricity', 'population', 'primary_energy_consumption',
       'renewables_elec_per_capita', 'renewables_share_elec',
       'solar_share_elec', 'solar_elec_per_cap

In [84]:
# Se observan cuántos valores faltantes tiene cada columna (ordenados de mayor a menor)
missing_values =suram1.isnull().sum().sort_values(ascending = False) 
missing_values

iso_code                                   0
gas_share_elec                             0
hydro_share_elec                           0
hydro_elec_per_capita                      0
low_carbon_share_elec                      0
low_carbon_electricity                     0
low_carbon_elec_per_capita                 0
oil_share_elec                             0
oil_elec_per_capita                        0
other_renewables_elec_per_capita           0
other_renewables_share_elec                0
per_capita_electricity                     0
population                                 0
primary_energy_consumption                 0
renewables_elec_per_capita                 0
renewables_share_elec                      0
solar_share_elec                           0
solar_elec_per_capita                      0
wind_share_elec                            0
gas_elec_per_capita                        0
fossil_share_elec                          0
country                                    0
fossil_con

In [None]:
peliculas5['pel_exitosa']=peliculas5['punt_media'].apply(lambda x: 0 if x < 3.5 else 1)
peliculas5

In [None]:
# Se crean las variables dummies
dum_supervisado = pd.get_dummies(peliculas5[['plataforma', 'clasificacion',]])

In [None]:
supervisado1= pd.concat([peliculas5, dum_supervisado], axis=1)
supervisado1

In [None]:
# Se guarada el dataset a csv
supervisado1.to_csv(r'..\datasets\supervisado1.csv', index=False)
supervisado1

In [None]:
# Se eliminan aquellas variables que no se van a incluir en el modelo.
supervisado1= supervisado1.drop(['show_id','titulo', 'director', 'elenco', 'clasificacion', 'genero', 'plataforma', 'anio', 'duracion'], axis=1)
supervisado1

In [None]:
# Se guarada el dataset a csv
supervisado1.to_csv(r'..\datasets\supervisado2.csv', index=False)

In [None]:
# Se importa el dataset y se visualiza
supervisado2=pd.read_csv(r'..\\datasets\\supervisado2.csv')
supervisado2

### Objetivo: Determinar si la película será exitosa


In [None]:
# Se eliminan aquellas variables que no se vana a incluir en el modelo y se las guardo en x. 
x= supervisado2.drop(['punt_media'], axis=1)
x.head()

In [None]:
lista_atributos = x.columns

In [None]:
# Ahora selecciono las etiquetas y las guardo en y.
y = supervisado2['punt_media']

In [None]:
x, y = np.array(x), np.array(y)

In [None]:
x      

In [None]:
y[:4]   # En este caso es el punt_media

### Separando Train / Test 

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2,random_state=42)

In [None]:
x.shape # Se observan la cantidad de filas y columnas 

In [None]:
X_train.shape # 10420 seran de entrenamiento 

In [None]:
X_test.shape  # 2606 serán de evaluación 

In [None]:
X_test

### Random Forest

In [None]:
# Se importan las librerías
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [None]:
# Se inicia el modelo
regresor = RandomForestRegressor(criterion='squared_error', random_state=42)

In [None]:
# Se entrena el modelo
regresor.fit(X_train, y_train);

In [None]:
regresor.get_params()

In [None]:
# Se predicen los valores para el set de testeo
y_pred = regresor.predict(X_test)

In [None]:
# Se calcula el error medio absoluto
mean_absolute_error(y_test, y_pred)

### Support Vector Machine

In [None]:
#Se importan librerías
from sklearn.svm import SVR

In [None]:
# Se inicia el modelo
regresor_svr = SVR(C=1.0, epsilon=0.2)

In [None]:
# Se entrena el modelo
regresor_svr.fit(X_train, y_train);

In [None]:
regresor_svr.get_params()

In [None]:
# Se predicen los valores para el set de testeo
y_pred_svr = regresor_svr.predict(X_test)

In [None]:
y_pred_svr

In [None]:
# Se calcula el error medio absoluto
mean_absolute_error(y_test, y_pred_svr)

In [None]:
sns.countplot(x=supervisado2['pel_exitosa'], palette='Set2')
sns.despine()

In [None]:
supervisado2['pel_exitosa'].value_counts() / supervisado2['punt_media'].size * 100

### Seleccion de target (y) y variables (X)

In [None]:
xc = supervisado2.drop(['punt_media', 'pel_exitosa'], axis=1)

In [None]:
yc = supervisado2['pel_exitosa']

In [None]:
xc, yc = np.array(xc), np.array(yc)

In [None]:
# Separando Train / Test
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(xc, yc, test_size=0.2,random_state=42)

### Clasificador kNN

In [None]:
# se importan librerías 
from sklearn.neighbors import KNeighborsClassifier

In [None]:
# metricas
from sklearn.metrics import accuracy_score

In [None]:
knn = KNeighborsClassifier()

In [None]:
# Se entrena el modelo
knn.fit(X_train_c, y_train_c);

In [None]:
# Se realiza la predicción
predicciones= knn.predict(X_test_c)

In [None]:
accuracy = accuracy_score(y_train_c, predicciones)*100
print(f'{round(accuracy, 2)}%')