In [16]:
# importing the libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import seaborn as sb

In [49]:
# panda display options
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
# pd.reset_option('all')

In [None]:
# bringing in the csv and copying
df = pd.read_csv('mxmh_survey_results.csv')
df_copy = df.copy()

In [None]:
# dropping the Timestamp column
df.drop(columns = 'Timestamp', inplace = True)

In [None]:
# dropping the Permissions column
df.drop(columns = 'Permissions', inplace = True)

In [None]:
# sorting the df by age and displaying the disorders
df.sort_values(by = 'Age')[['Age', 'Anxiety', 'Depression', 'Insomnia', 'OCD']]

In [15]:
# trying to create a scatterplot based on the above cell
fig = px.scatter_matrix(df,
                        dimensions = ['Anxiety', 'Depression', 'Insomnia', 'OCD'],
                        color = 'Age',
                        title = 'Scatter Plot Matrix for Disorders by Age')
fig.show()

In [21]:
# Create the Plotly Figure
fig2 = go.Figure()

# Add a line for each disorder
disorders = ['Anxiety', 'Depression', 'Insomnia', 'OCD']
for disorder in disorders:
    fig2.add_trace(go.Scatter(
        x=df['Age'],
        y=df[disorder],
        mode='lines+markers',
        name=disorder
    ))

# Update layout
fig2.update_layout(
    title="Age Correlation with Disorder Ratings",
    xaxis_title="Age",
    yaxis_title="Disorder Rating",
    legend_title="Disorders",
    template="plotly_white"
)

fig2.show()

In [22]:
df_melted = df.melt(id_vars=['Age'], 
                    value_vars=['Anxiety', 'Depression', 'Insomnia', 'OCD'], 
                    var_name='Disorder', 
                    value_name='Rating')

# Create a histogram
fig3 = px.histogram(
    df_melted, 
    x="Age", 
    y="Rating", 
    color="Disorder", 
    barmode="group",  # Group bars by Disorder
    title="Distribution of Disorder Ratings Across Ages",
    labels={"Rating": "Disorder Rating", "Age": "Age"},
    template="plotly_white"
)

fig3.update_layout(
    xaxis=dict(title="Age"),
    yaxis=dict(title="Disorder Rating"),
    legend_title="Disorders"
)

fig3.show()

In [24]:
df_melted = df.melt(
    id_vars=['Age'], 
    value_vars=['Anxiety', 'Depression', 'Insomnia', 'OCD'], 
    var_name='Disorder', 
    value_name='Rating'
)

# Create an overlayed histogram
fig4 = px.histogram(
    df_melted,
    x="Age",
    color="Disorder",
    facet_col="Disorder",  # Separate plots for each disorder
    barmode="overlay",  # Overlap the bars
    title="Distribution of Disorder Ratings Across Ages",
    labels={"Rating": "Disorder Rating", "Age": "Age"},
    template="plotly_white"
)

# Update layout for better clarity
fig4.update_layout(
    xaxis=dict(title="Age"),
    yaxis=dict(title="Count of Ratings"),
    legend_title="Disorders"
)

fig4.show()

In [None]:
# seeing how many of each age value exists
df['Age'].value_counts().sort_index()

Age
10.0     1
12.0     3
13.0     8
14.0    17
15.0    21
16.0    44
17.0    59
18.0    85
19.0    61
20.0    40
21.0    52
22.0    39
23.0    37
24.0    20
25.0    22
26.0    22
27.0    18
28.0    13
29.0    13
30.0    11
31.0    13
32.0    16
33.0     8
34.0     8
35.0     7
36.0     7
37.0     5
38.0     6
39.0     1
40.0     5
41.0     4
42.0     6
43.0     4
44.0     3
46.0     1
48.0     2
49.0     5
50.0     1
51.0     1
53.0     3
54.0     2
55.0     1
56.0     5
57.0     3
58.0     3
59.0     2
60.0     6
61.0     2
63.0     3
64.0     3
65.0     1
67.0     3
68.0     1
69.0     1
70.0     1
71.0     1
72.0     1
73.0     1
74.0     1
80.0     1
89.0     1
Name: count, dtype: int64

In [45]:
# Creating Age Groups and Counts columns
# Define the bins and labels for age groups
bins = [10, 15, 20, 30, 40, 50, 60, float('inf')]  # `float('inf')` represents 60+
labels = ['10-15', '16-20', '21-30', '31-40', '41-50', '51-60', '60+']

# Create the 'Age Groups' column, ensuring the lowest value (10) is included
df['Age Groups'] = pd.cut(df['Age'], bins=bins, labels=labels, right=True, include_lowest=True)

# Calculate the counts for each Age Group
age_group_counts = df['Age Groups'].value_counts()

# Map the counts back to the DataFrame
df['Group Counts'] = df['Age Groups'].map(age_group_counts)

In [46]:
columns = df.columns.to_list()
index = columns.index('Age')
index

0

In [47]:
columns

['Age',
 'Age Groups',
 'Group Counts',
 'Primary streaming service',
 'Hours per day',
 'While working',
 'Instrumentalist',
 'Composer',
 'Fav genre',
 'Exploratory',
 'Foreign languages',
 'BPM',
 'Frequency [Classical]',
 'Frequency [Country]',
 'Frequency [EDM]',
 'Frequency [Folk]',
 'Frequency [Gospel]',
 'Frequency [Hip hop]',
 'Frequency [Jazz]',
 'Frequency [K pop]',
 'Frequency [Latin]',
 'Frequency [Lofi]',
 'Frequency [Metal]',
 'Frequency [Pop]',
 'Frequency [R&B]',
 'Frequency [Rap]',
 'Frequency [Rock]',
 'Frequency [Video game music]',
 'Anxiety',
 'Depression',
 'Insomnia',
 'OCD',
 'Music effects']

In [None]:
# moving the Age Groups Column
columns.insert(index + 1, columns.pop(columns.index('Age Groups')))

In [None]:
# moving the Group Counts Columns
columns.insert(index + 2, columns.pop(columns.index('Group Counts')))

In [None]:
# Reorganizing the Dataframe
df = df[columns]

In [50]:
df.sort_values(by = 'Age')

Unnamed: 0,Age,Age Groups,Group Counts,Primary streaming service,Hours per day,While working,Instrumentalist,Composer,Fav genre,Exploratory,Foreign languages,BPM,Frequency [Classical],Frequency [Country],Frequency [EDM],Frequency [Folk],Frequency [Gospel],Frequency [Hip hop],Frequency [Jazz],Frequency [K pop],Frequency [Latin],Frequency [Lofi],Frequency [Metal],Frequency [Pop],Frequency [R&B],Frequency [Rap],Frequency [Rock],Frequency [Video game music],Anxiety,Depression,Insomnia,OCD,Music effects
369,10.0,10-15,50.0,YouTube Music,2.0,Yes,Yes,No,Pop,Yes,Yes,112.0,Very frequently,Never,Rarely,Never,Never,Rarely,Rarely,Very frequently,Never,Rarely,Never,Very frequently,Never,Rarely,Never,Very frequently,8.0,2.0,1.0,1.0,Improve
411,12.0,10-15,50.0,Spotify,0.5,Yes,Yes,Yes,Classical,Yes,Yes,109.0,Very frequently,Never,Never,Never,Never,Never,Rarely,Never,Never,Rarely,Never,Sometimes,Never,Never,Never,Never,7.0,8.0,6.0,2.0,Improve
433,12.0,10-15,50.0,Spotify,2.0,Yes,Yes,Yes,Classical,Yes,Yes,,Very frequently,Rarely,Never,Never,Rarely,Sometimes,Rarely,Rarely,Never,Never,Never,Sometimes,Never,Rarely,Sometimes,Never,8.0,0.0,1.0,6.0,Improve
384,12.0,10-15,50.0,YouTube Music,2.0,Yes,Yes,Yes,Classical,No,No,,Very frequently,Never,Never,Never,Never,Never,Never,Never,Never,Rarely,Never,Rarely,Never,Never,Never,Never,0.0,0.0,3.0,0.0,Improve
127,13.0,10-15,50.0,Spotify,2.0,Yes,Yes,Yes,Rock,Yes,No,120.0,Rarely,Never,Sometimes,Very frequently,Rarely,Sometimes,Never,Never,Never,Sometimes,Sometimes,Very frequently,Rarely,Very frequently,Very frequently,Very frequently,7.0,10.0,5.0,6.0,Worsen
442,13.0,10-15,50.0,Spotify,6.0,Yes,No,No,Rap,Yes,No,112.0,Never,Never,Never,Never,Never,Very frequently,Never,Rarely,Never,Rarely,Never,Rarely,Never,Very frequently,Never,Never,4.0,0.0,0.0,2.0,Worsen
577,13.0,10-15,50.0,YouTube Music,1.0,Yes,No,No,Video game music,Yes,No,,Never,Never,Sometimes,Never,Never,Rarely,Sometimes,Never,Never,Sometimes,Rarely,Rarely,Rarely,Never,Sometimes,Very frequently,4.0,3.0,4.0,0.0,Improve
393,13.0,10-15,50.0,Other streaming service,4.0,Yes,Yes,Yes,Classical,Yes,No,126.0,Very frequently,Never,Sometimes,Rarely,Never,Never,Rarely,Rarely,Never,Never,Rarely,Very frequently,Never,Never,Sometimes,Sometimes,8.0,5.0,10.0,3.0,Improve
408,13.0,10-15,50.0,YouTube Music,3.0,Yes,,Yes,Classical,No,No,80.0,Very frequently,Never,Never,Never,Never,Never,Never,Never,Never,Never,Never,Never,Never,Never,Never,Never,8.0,7.0,9.0,2.0,Improve
344,13.0,10-15,50.0,Spotify,2.5,Yes,No,No,Folk,No,No,118.0,Never,Sometimes,Never,Very frequently,Never,Never,Never,Never,Never,Never,Never,Very frequently,Never,Never,Never,Never,8.0,0.0,0.0,3.0,Improve
