# Practice Exercise: Self-Organizing Maps (SOMs)

## Scenario:
You have a dataset of student grades across three subjects: **Math**, **Science**, and **English**. Your goal is to:
1. Visualize the student performance patterns using a SOM.
2. Identify clusters of students with similar performance.

In [2]:
from minisom import MiniSom 
import numpy as np
import pandas as pd 
import plotly.express as px
from sklearn.preprocessing import MinMaxScaler

import plotly.io as pio
pio.renderers.default = 'vscode'

## Data:
students = pd.DataFrame({
    "Math": [78, 72, 90, 88, 92, 60, 58, 55, 54, 95],
    "Science": [80, 75, 91, 87, 93, 62, 59, 57, 56, 96],
    "English": [70, 68, 85, 82, 89, 50, 45, 60, 58, 90]
})
students

Unnamed: 0,Math,Science,English
0,78,80,70
1,72,75,68
2,90,91,85
3,88,87,82
4,92,93,89
5,60,62,50
6,58,59,45
7,55,57,60
8,54,56,58
9,95,96,90


In [8]:
fig = px.scatter_3d(
    students,
    x="Math",
    y="Science",
    z="English",
    title="Student Grades Visualization",
    template="plotly_dark"
)
fig.update_traces(marker=dict(size=8))  # Adjust point size
fig.show()

In [3]:
# Normalize the data
scaler = MinMaxScaler()
data_normalized = scaler.fit_transform(students)
data_normalized

array([[0.58536585, 0.6       , 0.55555556],
       [0.43902439, 0.475     , 0.51111111],
       [0.87804878, 0.875     , 0.88888889],
       [0.82926829, 0.775     , 0.82222222],
       [0.92682927, 0.925     , 0.97777778],
       [0.14634146, 0.15      , 0.11111111],
       [0.09756098, 0.075     , 0.        ],
       [0.02439024, 0.025     , 0.33333333],
       [0.        , 0.        , 0.28888889],
       [1.        , 1.        , 1.        ]])

In [5]:
som = MiniSom(x = 5, y = 5, input_len = 3, sigma = 1.0, learning_rate = 0.5)
som.random_weights_init(data_normalized)
som.train_random(data_normalized, num_iteration = 100)

som

<minisom.MiniSom at 0x1a12e5da850>

In [6]:
# Dataframe for Visualization
som_results = []
for i, x in enumerate(data_normalized):
    w = som.winner(x)
    som_results.append({'Student': i + 1, 'Neuron X': w[0], 'Neuron Y': w[1]})

results_df = pd.DataFrame(som_results)
results_df

Unnamed: 0,Student,Neuron X,Neuron Y
0,1,1,3
1,2,0,0
2,3,2,2
3,4,2,1
4,5,3,2
5,6,3,4
6,7,3,4
7,8,4,0
8,9,4,0
9,10,3,2


In [7]:
# Plot SOM using Plotly
fig = px.scatter(
    results_df,
    x="Neuron X",
    y="Neuron Y",
    text="Student",
    title="Self-Organizing Map (SOM) - Student Clustering",
    labels={"Neuron X": "Neuron X", "Neuron Y": "Neuron Y"},
    template="plotly_dark",
    size_max=15
)
fig.update_traces(textposition='top center')
fig.update_layout(showlegend=False)
fig.show()