In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from neural_map import NeuralMap, _plot

In [None]:
base_path = 'datasets'

persian_gulf_countries = ['Países del golfo pérsico', ['United Arab Emirates', 'Bahrain', 'Iran, Islamic Rep.', 'Kuwait', 'Oman', 'Qatar', 'Saudi Arabia']]
g_20 = ['Grupo de los 20', ['Argentina', 'Australia', 'Brazil', 'Canada', 'China', 'Germany', 'France', 'United Kingdom', 'Italy', 'Japan', 'Korea, Rep.', 'Mexico', 'Russian Federation', 'Saudi Arabia', 'Turkey', 'United States', 'South Africa', 'Indonesia', 'India']]
IMF_advanced_economies = ['Economías avanzadas', ['Austria','Belgium','Czech Republic','Denmark','Estonia','Finland','France','Germany','Greece','Iceland','Ireland','Italy','Latvia','Lithuania','Luxembourg','Malta','Netherlands','Norway','Portugal','Slovak Republic','Slovenia','Spain','Sweden','Switzerland','United Kingdom','Cyprus','Hong Kong SAR','Israel','Japan','Singapore','Korea, Rep.','Taiwan, China','Canada','United States','Australia','New Zealand']]
latin_america = ['América Latina', ['Argentina','Bolivia','Brazil','Barbados','Chile','Colombia','Costa Rica','Dominican Republic','Ecuador','Guatemala','Honduras','Haiti','Jamaica','Mexico','Nicaragua','Panama','Peru','Paraguay','El Salvador','Trinidad and Tobago','Uruguay','Venezuela']]
ex_socialist = ['Ex socialistas', ['Albania', 'Mongolia', 'Armenia','Azerbaijan','Bulgaria','Czech Republic','Estonia','Georgia','Hungary','Kazakhstan','Kyrgyz Republic','Lithuania','Latvia','Moldova','Poland','Romania','Russian Federation','Slovak Republic','Tajikistan','Ukraine','Bosnia and Herzegovina','Croatia','Macedonia, FYR','Montenegro','Serbia','Slovenia']]
sub_saharan_africa = ['África subsahariana', ['Angola', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi', 'Cameroon', 'Central African Republic', 'Chad', 'Republic of the Congo', 'Congo, Democratic Rep.', "Côte d'Ivoire", 'Eritrea', 'Ethiopia', 'Gabon', 'Gambia, The', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Kenya', 'Lesotho', 'Liberia', 'Madagascar', 'Malawi', 'Mali', 'Mauritania', 'Mozambique', 'Namibia', 'Niger', 'Nigeria', 'Rwanda', 'Senegal', 'Sierra Leone', 'Somalia', 'South Africa', 'Tanzania', 'Togo', 'Uganda', 'Eswatini', 'Zambia', 'Zimbabwe']]

aaa_countries = ['Canada', 'Australia', 'Luxembourg', 'Germany', 'Denmark', 'Switzerland', 'Sweden', 'Norway']
anglo_sphere = ['Australia', 'Canada', 'United Kingdom', 'United States', 'New Zealand', 'Ireland']                 
IMF_HIPC = ['asdasdasd', ['Afghanistan','Benin','Bolivia','Burkina Faso','Burundi','Cameroon','Central African Republic','Chad','Republic of the Congo','Congo, Democratic Rep.','Comoros',"Côte d'Ivoire",'Ethiopia', 'Eritrea','Gambia, The','Ghana','Guinea','Guinea-Bissau','Guyana','Haiti','Honduras','Liberia','Madagascar','Mali','Mauritania','Malawi','Mozambique','Nicaragua','Niger','Rwanda','São Tomé and Príncipe','Senegal','Sierra Leone', 'Somalia','Togo','Uganda','Zambia']]
south_asia = ['asdasdasd', ['Afghanistan', 'Bangladesh', 'Bhutan', 'India', 'Maldives', 'Nepal', 'Pakistan', 'Sri Lanka']]
north_africa = ['asdasdasd', ['Morocco', 'Tunisia', 'Algeria', 'Egypt']]


In [None]:
# cargar el conjunto de datos
df = pd.read_csv(base_path + '/GCI_2017.csv')
df


In [None]:
# extraer los valores numéricos
data = df.drop(columns=['country']).values
data

In [None]:
# escalar los valores al rango [0, 1]
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
data

In [None]:
# instanciación de la clase NeuralMap
som = NeuralMap(

  # cantidad de variables que tiene cada país
  variables = data.shape[1],

  # métrica de distancia
  metric = 'correlation',

  # propiedades topológicas del mapa
  columns = 12,
  rows = 12,
  hexagonal = True,
  toroidal = False

)

In [None]:
# entranemiento de la instancia
som.train(
    
  # datos con los que se va a entrenar
  data = data,

  # cantidad de épocas de entrenamiento
  n_epochs=100,

  # datos para medir el aprendizaje de la red
  eval_data = data,

  # método de inicialización de pesos
  weight_init_function='uniform',

  # función de vecindad
  neighbourhood_function='gaussian',

  # tasa de aprendizaje y radio
  learning_rate_decay_function='linear',
  radius_decay_function='exponential',
  initial_learning_rate=1.,
  final_learning_rate=0.1,
  initial_radius=5.0,
  final_radius=1.0
    
)

In [None]:
som.plot_analysis(data, display_empty_nodes=False, title='Error de cuantización')

In [None]:
som.plot_unified_distance_matrix()

In [None]:
for i, subgroup in enumerate([IMF_advanced_economies, g_20, sub_saharan_africa, latin_america, ex_socialist, persian_gulf_countries]):
  som.plot_analysis(data, display_empty_nodes=False, title=subgroup[0], attached_values=df['country'].values, labels_to_display=subgroup[1])

In [None]:
som.plot_weights(scaler=scaler, headers=df.drop(columns=['country']).columns, size=7)

In [None]:
weights = som.weights.reshape(som.rows * som.columns, -1)
columns = df.drop(columns=['country']).columns.to_list()
selected_attributes = {
    'Apertura económica': [
        'Effect of taxation on incentives to invest, 1-7 (best)',
        'Effect of taxation on incentives to work, 1-7 (best)',
        'Flexibility of wage determination, 1-7 (best)',
        'Burden of government regulation, 1-7 (best)',
        'Imports as a percentage of GDP*',
        'Exports as a percentage of GDP*',
        'Prevalence of trade barriers, 1-7 (best)'
    ],
    'Capacidad de innovación y ventajas comeptitivas': [
        'Nature of competitive advantage, 1-7 (best)',
        'Capacity for innovation, 1-7 (best)',
        'Quality of scientific research institutions, 1-7 (best)',
        'Company spending on R&D, 1-7 (best)',
        'University-industry collaboration in R&D, 1-7 (best)',
        'Gov’t procurement of advanced tech products, 1-7 (best)'
    ],
    'Telefonía móvil': [
        'Mobile telephone subscriptions/100 pop.*',
        'Mobile broadband subscriptions/100 pop.*'
    ]
}
for attributes_group in selected_attributes:
    print('\n\n   ' + attributes_group)
    for attribute in [i for i, j in enumerate(columns) if j in selected_attributes[attributes_group]]:
        som.plot_analysis(weights, aggregation_function=np.mean, attached_values=weights[:, attribute], size=7, title=columns[attribute])
        plt.show()


In [None]:
map_countries = som.map_attachments(data, df['country'])
for node in [(0, 11), (0, 3), (6, 9), (0, 0), (4, 11)]:
    print(node)
    print(str(map_countries[node]) + '\n')


In [None]:
Argentina = scaler.transform(df[df['country'] == 'Argentina'].drop(columns=['country']).values)
som.get_best_matching_unit(Argentina)

In [None]:
som_dict = som.get_dict()

# ... guardar como JSON en el disco duro y cargar de nuevo ...

new_som = NeuralMap(**som_dict)

som.plot_analysis(data, display_empty_nodes=False)

In [None]:
clusters = 10

labels, centers = som.k_means(clusters)
_plot.tiles(som.positions, som.hexagonal, labels, norm=False, title='Clustering con K-means. Valor de K = ' + str(clusters), labels=list(range(clusters)), color_map=plt.cm.get_cmap('hsv', clusters + 1))

labels, centers = som.k_medoids(clusters)
_plot.tiles(som.positions, som.hexagonal, labels, norm=False, title='Clustering con K-medoids. Valor de K = ' + str(clusters), labels=list(range(clusters)), color_map=plt.cm.get_cmap('hsv', clusters + 1))

In [None]:
batch = 5
if batch:
  labels = df['country'].values
  for i in range(labels.shape[0] // batch):
    som.plot_analysis(data, display_empty_nodes=False, attached_values=labels, labels_to_display=labels[i * batch : (i + 1) * batch])
  som.plot_analysis(data, display_empty_nodes=False, attached_values=labels, labels_to_display=labels[-(labels.shape[0] % batch):])