In [1]:
import pandas as pd
from sklearn.decomposition import PCA

In [2]:
datos = pd.read_csv("mxmh_survey_results.csv")
datos.columns

Index(['Timestamp', 'Age', 'Primary streaming service', 'Hours per day',
       'While working', 'Instrumentalist', 'Composer', 'Fav genre',
       'Exploratory', 'Foreign languages', 'BPM', 'Frequency [Classical]',
       'Frequency [Country]', 'Frequency [EDM]', 'Frequency [Folk]',
       'Frequency [Gospel]', 'Frequency [Hip hop]', 'Frequency [Jazz]',
       'Frequency [K pop]', 'Frequency [Latin]', 'Frequency [Lofi]',
       'Frequency [Metal]', 'Frequency [Pop]', 'Frequency [R&B]',
       'Frequency [Rap]', 'Frequency [Rock]', 'Frequency [Video game music]',
       'Anxiety', 'Depression', 'Insomnia', 'OCD', 'Music effects',
       'Permissions'],
      dtype='object')

In [3]:
datos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 736 entries, 0 to 735
Data columns (total 33 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Timestamp                     736 non-null    object 
 1   Age                           735 non-null    float64
 2   Primary streaming service     735 non-null    object 
 3   Hours per day                 736 non-null    float64
 4   While working                 733 non-null    object 
 5   Instrumentalist               732 non-null    object 
 6   Composer                      735 non-null    object 
 7   Fav genre                     736 non-null    object 
 8   Exploratory                   736 non-null    object 
 9   Foreign languages             732 non-null    object 
 10  BPM                           629 non-null    float64
 11  Frequency [Classical]         736 non-null    object 
 12  Frequency [Country]           736 non-null    object 
 13  Frequ


'Never' 'Nunca' = 0
'Rarely' 'Rara vez' = 1
'Sometimes' 'A veces' = 2
'Very frequently' 'Muy frecuentemente' = 3


'Rara vez' 'Nunca' 'Muy frecuentemente' 'A veces'

In [4]:
datos["Frequency [Classical]"].unique()
columnas_frecuencias = [
    "Frequency [Classical]", "Frequency [Country]", "Frequency [EDM]", "Frequency [Folk]",
    "Frequency [Gospel]", "Frequency [Hip hop]", "Frequency [Jazz]", "Frequency [K pop]",
    "Frequency [Latin]", "Frequency [Lofi]", "Frequency [Metal]", "Frequency [Pop]",
    "Frequency [R&B]", "Frequency [Rap]", "Frequency [Rock]", "Frequency [Video game music]"
]

for columna in columnas_frecuencias :
    print(f"{columna}: {datos[columna].unique()}")

Frequency [Classical]: ['Rarely' 'Sometimes' 'Never' 'Very frequently']
Frequency [Country]: ['Never' 'Sometimes' 'Very frequently' 'Rarely']
Frequency [EDM]: ['Rarely' 'Never' 'Very frequently' 'Sometimes']
Frequency [Folk]: ['Never' 'Rarely' 'Sometimes' 'Very frequently']
Frequency [Gospel]: ['Never' 'Sometimes' 'Rarely' 'Very frequently']
Frequency [Hip hop]: ['Sometimes' 'Rarely' 'Never' 'Very frequently']
Frequency [Jazz]: ['Never' 'Very frequently' 'Rarely' 'Sometimes']
Frequency [K pop]: ['Very frequently' 'Rarely' 'Sometimes' 'Never']
Frequency [Latin]: ['Very frequently' 'Sometimes' 'Never' 'Rarely']
Frequency [Lofi]: ['Rarely' 'Sometimes' 'Very frequently' 'Never']
Frequency [Metal]: ['Never' 'Sometimes' 'Rarely' 'Very frequently']
Frequency [Pop]: ['Very frequently' 'Sometimes' 'Rarely' 'Never']
Frequency [R&B]: ['Sometimes' 'Never' 'Very frequently' 'Rarely']
Frequency [Rap]: ['Very frequently' 'Rarely' 'Never' 'Sometimes']
Frequency [Rock]: ['Never' 'Very frequently' 'Rare

In [5]:

mapeo_frecuencia = {
    'Never': 0,
    'Rarely': 1,
    'Sometimes': 2,
    'Very frequently': 3
}


for columna in columnas_frecuencias:
    datos[columna] = datos[columna].map(mapeo_frecuencia)

In [6]:
for columna in columnas_frecuencias:
    print(f"{columna}: {datos[columna].unique()}")

Frequency [Classical]: [1 2 0 3]
Frequency [Country]: [0 2 3 1]
Frequency [EDM]: [1 0 3 2]
Frequency [Folk]: [0 1 2 3]
Frequency [Gospel]: [0 2 1 3]
Frequency [Hip hop]: [2 1 0 3]
Frequency [Jazz]: [0 3 1 2]
Frequency [K pop]: [3 1 2 0]
Frequency [Latin]: [3 2 0 1]
Frequency [Lofi]: [1 2 3 0]
Frequency [Metal]: [0 2 1 3]
Frequency [Pop]: [3 2 1 0]
Frequency [R&B]: [2 0 3 1]
Frequency [Rap]: [3 1 0 2]
Frequency [Rock]: [0 3 1 2]
Frequency [Video game music]: [2 1 3 0]


In [None]:
datos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 736 entries, 0 to 735
Data columns (total 33 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Timestamp                     736 non-null    object 
 1   Age                           735 non-null    float64
 2   Primary streaming service     735 non-null    object 
 3   Hours per day                 736 non-null    float64
 4   While working                 733 non-null    object 
 5   Instrumentalist               732 non-null    object 
 6   Composer                      735 non-null    object 
 7   Fav genre                     736 non-null    object 
 8   Exploratory                   736 non-null    object 
 9   Foreign languages             732 non-null    object 
 10  BPM                           629 non-null    float64
 11  Frequency [Classical]         736 non-null    int64  
 12  Frequency [Country]           736 non-null    int64  
 13  Frequ

In [8]:
columnas_a_eliminar = ['Timestamp', 'Age', 'Primary streaming service', 'Hours per day',
       'While working', 'Instrumentalist', 'Composer', 'Fav genre',
       'Exploratory', 'Foreign languages', 'BPM', 'Music effects',
       'Permissions']


datos_musica = datos.drop(columns=columnas_a_eliminar)

datos_musica.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 736 entries, 0 to 735
Data columns (total 20 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   Frequency [Classical]         736 non-null    int64  
 1   Frequency [Country]           736 non-null    int64  
 2   Frequency [EDM]               736 non-null    int64  
 3   Frequency [Folk]              736 non-null    int64  
 4   Frequency [Gospel]            736 non-null    int64  
 5   Frequency [Hip hop]           736 non-null    int64  
 6   Frequency [Jazz]              736 non-null    int64  
 7   Frequency [K pop]             736 non-null    int64  
 8   Frequency [Latin]             736 non-null    int64  
 9   Frequency [Lofi]              736 non-null    int64  
 10  Frequency [Metal]             736 non-null    int64  
 11  Frequency [Pop]               736 non-null    int64  
 12  Frequency [R&B]               736 non-null    int64  
 13  Frequ

In [10]:
X = datos_musica.drop(columns=["Anxiety", "Depression", "Insomnia", "OCD"])
yA = datos_musica["Anxiety"]
yD = datos_musica["Depression"]
yI = datos_musica["Insomnia"]
yO = datos_musica["OCD"]

X.shape, yA.shape, yD.shape, yI.shape, yO.shape


((736, 16), (736,), (736,), (736,), (736,))

In [None]:
pca = PCA(8)
X_transformed = pca.fit_transform(X)
X_transformed.shape

(736, 8)