# Analysis

**Initializing**

In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

from Brands import Sonites
from Analyzer import Analyzer

move_mkt_weights = {
    "Explorers": 0,
    "Shoppers": 0.35,
    "Professionals": 0,
    "High Earners": 0,
    "Savers": 0.65
}


an = Analyzer(marketing_mix_segment_weights=move_mkt_weights, last_period=2)

son = Sonites()



**Percentage expenditure by segment for each product**

In [2]:
df_marketing_mixes = son.get_marketing_mixes(capped=False)
df_marketing_mixes


Unnamed: 0_level_0,Explorers,Shoppers,Professionals,High Earners,Savers
MARKET : Sonites,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
MOST,0.086735,0.285714,0.086735,0.086735,0.454082
MOVE,0.414634,0.097561,0.195122,0.195122,0.097561
ROBUDO,0.090226,0.421053,0.090226,0.090226,0.308271
ROCK,0.092308,0.092308,0.338462,0.384615,0.092308
SOFT,0.149758,0.251208,0.149758,0.149758,0.299517
SOLO,0.202899,0.275362,0.173913,0.173913,0.173913
TONE,0.302326,0.149502,0.122924,0.302326,0.122924
TOPS,0.302682,0.122605,0.302682,0.122605,0.149425


## Semantic scales

In [3]:
son.df_segments_semantic

Unnamed: 0_level_0,Segment,Period,# Features,Design Index,Battery Life,Display Size,Proc. Power,Price
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Explorers_2,Explorers,2,4.55,1.91,5.82,5.91,6.13,3.7
High Earners_2,High Earners,2,3.27,6.37,3.55,4.47,4.78,5.57
Professionals_2,Professionals,2,5.32,5.85,5.03,5.54,5.43,5.23
Savers_2,Savers,2,2.58,3.9,1.78,2.73,2.25,2.13
Shoppers_2,Shoppers,2,1.87,4.97,2.9,4.27,4.05,3.02
Explorers_1,Explorers,1,4.65,1.8,6.03,5.99,6.25,3.54
High Earners_1,High Earners,1,3.27,6.38,3.57,4.64,4.83,5.59
Professionals_1,Professionals,1,5.56,5.85,5.05,5.46,5.28,5.0
Savers_1,Savers,1,2.53,3.89,1.75,2.58,2.22,2.14
Shoppers_1,Shoppers,1,1.81,5.08,2.93,4.13,3.86,3.14


In [4]:
son.df_sonites_semantic

Unnamed: 0_level_0,# Features,Design Index,Battery Life,Display Size,Proc. Power,Price
MARKET : Sonites,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
MOST,2.11,2.34,3.31,1.61,1.5,2.36
MOVE,4.26,1.88,5.93,3.05,3.17,4.83
ROBUDO,2.11,4.77,2.39,4.22,3.18,2.82
ROCK,4.7,6.37,6.47,5.93,5.07,5.7
SOFT,1.73,1.63,3.31,1.61,1.55,2.28
SOLO,1.73,3.23,4.0,2.07,2.86,4.36
TONE,5.32,6.5,5.12,4.52,5.61,5.88
TOPS,3.16,1.63,1.84,6.32,6.05,5.11


### Compute Distances between Segments and Products 

----
#### Note on "Relevance Score"

The **relevance score** quantifies the alignment between an observation and a benchmark (ideal target), with values ranging from 0 to 1:

- A score of **1** indicates that the observation perfectly matches the benchmark.
- A score of **0** indicates the maximum possible distance between the observation and benchmark, within the given feature space.

##### Formal Calculation

Given:
- An **observation vector** $ \mathbf{x} = (x_1, x_2, \dots, x_n) $
- A **benchmark vector** $ \mathbf{y} = (y_1, y_2, \dots, y_n) $
- A **weight vector** $ \mathbf{w} = (w_1, w_2, \dots, w_n) $, where each $ w_i $ represents the relative importance of feature $ i $ and $ \sum_{i=1}^n w_i = 1 $

The relevance score $ R $ is computed as follows:

\begin{equation*}
R = 1 - \frac{D(\mathbf{x}, \mathbf{y})}{D_{\text{max}}}
\end{equation*}

where:
- $ D(\mathbf{x}, \mathbf{y}) $ is the **weighted Euclidean distance** between the observation and the benchmark:

  \begin{equation*}
  D(\mathbf{x}, \mathbf{y}) = \sqrt{\sum_{i=1}^n w_i \cdot (x_i - y_i)^2}
  \end{equation*}

- $ D_{\text{max}} $ is the **maximum possible weighted Euclidean distance** for the feature space, assuming each feature spans the full range from minimum to maximum possible values. For example, if features range from 1 to 7, the maximum distance for each feature $ i $ would be $ x_i - y_i = 6 $:

  \begin{equation*}
  D_{\text{max}} = \sqrt{\sum_{i=1}^n w_i \cdot (\Delta_i)^2}
  \end{equation*}

  where $ \Delta_i $ is the maximum possible range for feature $ i $.

##### Interpretation

- **High Relevance Score (close to 1)**: The observation is highly similar to the benchmark.
- **Low Relevance Score (closer to 0)**: The observation is farther from the benchmark.
----


#### Compute closest brands for each segment

In [5]:
df_seg_sem = son.df_segments_semantic[:5][['# Features', 'Design Index', 'Battery Life','Display Size', 'Proc. Power', 'Price']]
df_seg_sem


_ = an.get_n_closest(df_base=df_seg_sem, df_performers=son.df_sonites_semantic, num_top=3)

---------- Explorers_2 ----------
Segment:	 TOPS
Distance:	 0.769327415435121
Segment:	 MOVE
Distance:	 0.6822110114087632
Segment:	 ROCK
Distance:	 0.6584721536371159

---------- High Earners_2 ----------
Segment:	 TONE
Distance:	 0.8732389807193461
Segment:	 ROCK
Distance:	 0.8395276600716305
Segment:	 SOLO
Distance:	 0.6897959306749017

---------- Professionals_2 ----------
Segment:	 ROCK
Distance:	 0.9059633520564319
Segment:	 TONE
Distance:	 0.8999245749299667
Segment:	 TOPS
Distance:	 0.7075157038539275

---------- Savers_2 ----------
Segment:	 MOST
Distance:	 0.8559858755982217
Segment:	 ROBUDO
Distance:	 0.8508361551591347
Segment:	 SOFT
Distance:	 0.8288548639418232

---------- Shoppers_2 ----------
Segment:	 ROBUDO
Distance:	 0.9219367080439186
Segment:	 SOLO
Distance:	 0.7583805112904527
Segment:	 MOST
Distance:	 0.6919463301759522



#### Compute closest Segments for each Brand

In [6]:
_ = an.get_n_closest(df_base=son.df_sonites_semantic, df_performers=df_seg_sem)

---------- MOST ----------
Segment:	 Savers_2
Distance:	 0.8559858755982217
Segment:	 Shoppers_2
Distance:	 0.6919463301759522
Segment:	 Explorers_2
Distance:	 0.5036224532331863

---------- MOVE ----------
Segment:	 Explorers_2
Distance:	 0.6822110114087632
Segment:	 Shoppers_2
Distance:	 0.680919831084708
Segment:	 High Earners_2
Distance:	 0.6753162536445776

---------- ROBUDO ----------
Segment:	 Shoppers_2
Distance:	 0.9219367080439186
Segment:	 Savers_2
Distance:	 0.8508361551591347
Segment:	 High Earners_2
Distance:	 0.660595375891528

---------- ROCK ----------
Segment:	 Professionals_2
Distance:	 0.9059633520564319
Segment:	 High Earners_2
Distance:	 0.8395276600716305
Segment:	 Explorers_2
Distance:	 0.6584721536371159

---------- SOFT ----------
Segment:	 Savers_2
Distance:	 0.8288548639418232
Segment:	 Shoppers_2
Distance:	 0.6715085155916547
Segment:	 Explorers_2
Distance:	 0.5015285154979494

---------- SOLO ----------
Segment:	 Shoppers_2
Distance:	 0.7583805112904527
Se

## Multi Dimensional Scaling

In [7]:
son.df_segments_mds

Unnamed: 0_level_0,Segment,Period,Economy,Performance,Convenience
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Explorers_2,Explorers,2,2.02,13.74,-6.96
High Earners_2,High Earners,2,-10.44,4.56,9.98
Professionals_2,Professionals,2,-8.22,9.74,10.88
Savers_2,Savers,2,12.5,-10.72,-4.36
Shoppers_2,Shoppers,2,6.52,0.78,1.68
Explorers_1,Explorers,1,3.06,14.48,-7.14
High Earners_1,High Earners,1,-10.62,5.16,10.04
Professionals_1,Professionals,1,-6.64,8.9,11.1
Savers_1,Savers,1,12.38,-11.12,-4.48
Shoppers_1,Shoppers,1,5.72,-0.38,2.16


In [8]:
son.df_sonites_mds

Unnamed: 0_level_0,Economy,Performance,Convenience
MARKET : Sonites,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MOST,10.92,-17.52,-9.92
MOVE,-5.52,-5.78,-7.16
ROBUDO,7.9,-3.38,0.18
ROCK,-11.3,8.84,14.82
SOFT,11.46,-16.24,-13.48
SOLO,-2.38,-9.16,-5.1
TONE,-12.52,8.56,16.36
TOPS,-7.42,14.22,-14.5


### Compute Distances between Segments and Products 

#### Compute closest brands for each segment

In [13]:
df_seg_mds = son.df_segments_mds[:5][['Economy', 'Performance', 'Convenience']]

_ = an.get_n_closest(df_base=df_seg_mds, df_performers=son.df_sonites_mds, num_top=3, max_distance_1D=40, weighted="eq")

---------- Explorers_2 ----------
Segment:	 TOPS
Distance:	 0.8254794663466025
Segment:	 ROBUDO
Distance:	 0.7191351155211223
Segment:	 MOVE
Distance:	 0.6979507490490996

---------- High Earners_2 ----------
Segment:	 ROCK
Distance:	 0.9059215752682901
Segment:	 TONE
Distance:	 0.8872402997520834
Segment:	 MOVE
Distance:	 0.7024749310282129

---------- Professionals_2 ----------
Segment:	 ROCK
Distance:	 0.9266571975810396
Segment:	 TONE
Distance:	 0.8980269643484122
Segment:	 ROBUDO
Distance:	 0.6625837140859974

---------- Savers_2 ----------
Segment:	 MOST
Distance:	 0.8711832697201175
Segment:	 ROBUDO
Distance:	 0.8588387446924617
Segment:	 SOFT
Distance:	 0.8453994394145568

---------- Shoppers_2 ----------
Segment:	 ROBUDO
Distance:	 0.9331357095802749
Segment:	 SOLO
Distance:	 0.7839839897908799
Segment:	 MOVE
Distance:	 0.7645302567207413



#### Compute closest Segments for each Brand

In [10]:
_ = an.get_n_closest(df_base=son.df_sonites_mds, df_performers=df_seg_mds, num_top=3, max_distance_1D=40, weighted="eq")

---------- MOST ----------
Segment:	 Savers_2
Distance:	 0.8711832697201175
Segment:	 Shoppers_2
Distance:	 0.6808833807733188
Segment:	 Explorers_2
Distance:	 0.5289287032023001

---------- MOVE ----------
Segment:	 Shoppers_2
Distance:	 0.7645302567207413
Segment:	 Savers_2
Distance:	 0.7272959479582307
Segment:	 High Earners_2
Distance:	 0.7024749310282129

---------- ROBUDO ----------
Segment:	 Shoppers_2
Distance:	 0.9331357095802749
Segment:	 Savers_2
Distance:	 0.8588387446924617
Segment:	 Explorers_2
Distance:	 0.7191351155211223

---------- ROCK ----------
Segment:	 Professionals_2
Distance:	 0.9266571975810396
Segment:	 High Earners_2
Distance:	 0.9059215752682901
Segment:	 Shoppers_2
Distance:	 0.6599094483327517

---------- SOFT ----------
Segment:	 Savers_2
Distance:	 0.8453994394145568
Segment:	 Shoppers_2
Distance:	 0.6633778082181747
Segment:	 Explorers_2
Distance:	 0.5366731715084913

---------- SOLO ----------
Segment:	 Shoppers_2
Distance:	 0.7839839897908799
Segment

In [17]:
df_all_mds = son.get_comprehensive_df_mds()
index = df_all_mds.index 

distances = an.compute_distance_centroids(df_all_mds, df_all_mds, weighted="eq", max_distance_1D=40)[0]


df_out = pd.DataFrame(columns=index, index=index)

for i, start in enumerate(index):
    print(start)
    index_to_search = index[i:]
    for stop in index_to_search:
        df_out.loc[start, stop] = distances[start][stop]
        df_out.loc[stop, start] = distances[start][stop]

df_out

MOST
MOVE
ROBUDO
ROCK
SOFT
SOLO
TONE
TOPS
Explorers_2
High Earners_2
Professionals_2
Savers_2
Shoppers_2


Unnamed: 0,MOST,MOVE,ROBUDO,ROCK,SOFT,SOLO,TONE,TOPS,Explorers_2,High Earners_2,Professionals_2,Savers_2,Shoppers_2
MOST,0.0,20.389183,17.637177,42.434015,3.821466,16.432042,43.820525,36.942653,32.636777,36.603087,39.269418,8.924685,22.109048
MOVE,20.389183,0.0,15.483281,27.023567,20.920669,5.052485,28.42228,21.388913,20.926586,20.613141,23.94999,18.893491,16.313822
ROBUDO,17.637177,15.483281,0.0,27.061005,19.09578,12.921501,28.658897,27.567459,19.45889,22.258464,23.376886,9.779939,4.632494
ROCK,42.434015,27.023567,27.061005,0.0,44.135179,28.290861,1.98454,30.060958,25.996169,6.517944,5.081338,36.289199,23.562165
SOFT,3.821466,20.920669,19.09578,44.135179,0.0,17.660589,45.612564,35.85117,32.100224,38.244236,40.689955,10.711041,23.32187
SOLO,16.432042,5.052485,12.921501,28.290861,17.660589,0.0,29.620088,25.697977,23.392939,21.922783,25.429825,14.97984,14.966028
TONE,43.820525,28.42228,28.658897,1.98454,45.612564,29.620088,0.0,31.786557,27.96545,7.812221,7.064899,37.776146,25.269594
TOPS,36.942653,21.388913,27.567459,30.060958,35.85117,25.697977,31.786557,0.0,12.091137,26.489741,25.784778,33.490739,25.233898
Explorers_2,32.636777,20.926586,19.45889,25.996169,32.100224,23.392939,27.96545,12.091137,0.0,22.945318,20.955267,26.737277,16.212995
High Earners_2,36.603087,20.613141,22.258464,6.517944,38.244236,21.922783,7.812221,26.489741,22.945318,0.0,5.707083,31.070204,19.256687
