# Imports

In [1]:
import os
import gdown
import numpy as np
import pandas as pd
from minisom import MiniSom

#plotly imports
import plotly as py
import plotly.graph_objs as go
from plotly.offline import plot, iplot
import plotly.graph_objs as go
from sklearn.manifold import TSNE

# Download files

In [2]:
file = "./downloads/StartUpsESG_0602.csv"
if not os.path.exists(file):
    url = "https://drive.google.com/uc?id=1HAudVhltfQQ8U_Yhxr-GOiJ7mZFC8v1K"

    gdown.download(url, output=file)


In [3]:
df = pd.read_csv(file)

df.head()

Unnamed: 0,id,StartUp,Country,Year,ESG,E,S,G,Raised,distr_tk,...,FISCALFRD,FINACIFRD,FOREIGINVEST,FOREIGDP,INTERPAY,percmath,GINI,MAKTEFF,PERCTECH,DEVCOUNT
0,12316,VenusEnergy,Lithuania,2018,0.01,0.008,0.002,0.0,5986183.0,0.27,...,96.7,70,1299842000.0,2.420713,3.476571,4.26941,35.7,4.642995,3.11263,1
1,23416,GreenEnergyCoin,Netherlands,2018,0.018,0.01,0.006,0.002,0.0,0.0,...,88.2,80,-361467000000.0,-39.565294,2.264509,6.37706,28.1,5.40808,2.82506,1
2,1553,HydroMiner,Austria,2017,0.014,0.01,0.0,0.004,3500000.0,0.0,...,81.1,70,-28555240000.0,-6.277635,3.644302,6.07867,30.8,4.897589,4.38647,1
3,17028,PowerCorp,"Hong Kong SAR, China",2018,0.008,0.006,0.0,0.002,0.0,0.0,...,100.0,90,97036260000.0,26.828457,0.05,0.0,0.0,5.705542,0.0,1
4,8483,Emyron Coin,United States,2018,0.017,0.0,0.011,0.006,0.0,0.28,...,54.8,80,261482000000.0,1.2686,11.816379,7.2092,41.4,5.209139,3.78213,1


In [4]:
country_id_map = {country: idx for idx, country in enumerate(df['Country'].unique())}

df['Country_ID'] = df['Country'].map(country_id_map)

data = df[['Raised', 'Country_ID','ESG', 'E', 'S', 'G']]

raised_normalized = (df['Raised'] - df['Raised'].mean()) / df['Raised'].std()

data_normalized_raised = pd.concat([raised_normalized, df[['Country_ID', 'ESG', 'E', 'S', 'G']]], axis=1)

data_array = data_normalized_raised.to_numpy()

som = MiniSom(x=9, y=2, input_len=data.shape[1], sigma=1.0, learning_rate=0.5)

som.random_weights_init(data_array)

som.train_random(data_array, num_iteration=10)

winning_neurons = np.array([som.winner(x) for x in data_array])


# Plots

In [5]:
# Adicionando os rótulos de cluster ao array numpy
data_with_clusters = np.column_stack((data_array, winning_neurons))

# Criar um gráfico de dispersão 3D para visualizar os clusters
trace = go.Scatter3d(
    x=data_with_clusters[:, 0],  # Coluna Raised
    y=data_with_clusters[:, 2],  # Coluna ESG
    z=data_with_clusters[:, 1],  # Coluna Country_ID
    mode='markers',
    marker=dict(
        color=winning_neurons[:, 0],  # Cores com base nos neurônios vencedores
        size=8,
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

# Criar layout
layout = go.Layout(
    title='Clusters - MiniSom',
    scene=dict(
        xaxis=dict(title='Raised'),
        yaxis=dict(title='ESG'),
        zaxis=dict(title='Country')
    )
)

# Criar figura
fig = go.Figure(data=[trace], layout=layout)

# Plotar o gráfico
# plot(fig, filename='./out/minisom_cluster_plot.html')
iplot(fig)


In [6]:
# Criar um gráfico de dispersão 2D para visualizar os clusters
trace = go.Scatter(
    x=data_with_clusters[:, 0],  # Coluna Raised
    y=data_with_clusters[:, 1],  # Coluna Country_ID
    mode='markers',
    marker=dict(
        color=winning_neurons[:, 0],  # Cores com base nos neurônios vencedores
        size=8,
        line=dict(
            color='rgba(217, 217, 217, 0.14)',
            width=0.5
        ),
        opacity=0.8
    )
)

# Criar layout
layout = go.Layout(
    title='Clusters - MiniSom',
    xaxis=dict(title='Raised'),
    yaxis=dict(title='Country')
)

# Criar figura
fig = go.Figure(data=[trace], layout=layout)

# Plotar o gráfico
# filename='minisom_cluster_plot_2d.html'
iplot(fig)

In [7]:
# Reduce dimensionality using t-SNE
tsne = TSNE(n_components=2, perplexity=30, random_state=0)
data_tsne = tsne.fit_transform(data_array)

In [8]:
# Create a scatter plot trace
scatter = go.Scatter(
    x=data_tsne[:, 0],
    y=data_tsne[:, 1],
    mode='markers',
    marker=dict(
        color=winning_neurons[:, 0],  # Use winning neurons
        colorscale='Viridis',
        opacity=0.5
    ),
    text=['Cluster: {}'.format(label) for label in winning_neurons[:, 0]]  # Update text accordingly
)

# Create layout
layout = go.Layout(
    title='t-SNE Visualization of MiniSom Clusters',  # Update title
    xaxis=dict(title='t-SNE Dimension 1'),
    yaxis=dict(title='t-SNE Dimension 2'),
    coloraxis_colorbar=dict(title='Cluster')
)

# Create figure
fig = go.Figure(data=[scatter], layout=layout)

# Plot the figure
iplot(fig)

# Save in file

In [9]:
np.savetxt('minisom_with_clusters.csv', data_with_clusters, delimiter=',', header=','.join(list(data.columns) + ['Cluster']), comments='')