In [5]:
# importing the libraries
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import numpy as np
import seaborn as sb

In [6]:
# panda display options
pd.set_option('display.max_rows', 60)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
# pd.reset_option('all')

In [7]:
# bringing in the csv and copying
df = pd.read_csv('mxmh_survey_results.csv')
df_copy = df.copy()

In [8]:
# dropping the Timestamp column
df.drop(columns = 'Timestamp', inplace = True)

In [9]:
# dropping the Permissions column
df.drop(columns = 'Permissions', inplace = True)

In [10]:
# sorting the df by age and displaying the disorders
df.sort_values(by = 'Age')[['Age', 'Anxiety', 'Depression', 'Insomnia', 'OCD']]

Unnamed: 0,Age,Anxiety,Depression,Insomnia,OCD
369,10.0,8.0,2.0,1.0,1.0
411,12.0,7.0,8.0,6.0,2.0
433,12.0,8.0,0.0,1.0,6.0
384,12.0,0.0,0.0,3.0,0.0
127,13.0,7.0,10.0,5.0,6.0
...,...,...,...,...,...
429,73.0,5.0,4.0,0.0,1.0
494,74.0,4.0,2.0,1.0,0.0
527,80.0,7.0,3.0,9.0,2.0
695,89.0,0.0,0.0,0.0,0.0


In [11]:
# trying to create a scatterplot based on the above cell
fig = px.scatter_matrix(df,
                        dimensions = ['Anxiety', 'Depression', 'Insomnia', 'OCD'],
                        color = 'Age',
                        title = 'Scatter Plot Matrix for Disorders by Age')
fig.show()

In [12]:
# Create the Plotly Figure
fig2 = go.Figure()

# Add a line for each disorder
disorders = ['Anxiety', 'Depression', 'Insomnia', 'OCD']
for disorder in disorders:
    fig2.add_trace(go.Scatter(
        x=df['Age'],
        y=df[disorder],
        mode='lines+markers',
        name=disorder
    ))

# Update layout
fig2.update_layout(
    title="Age Correlation with Disorder Ratings",
    xaxis_title="Age",
    yaxis_title="Disorder Rating",
    legend_title="Disorders",
    template="plotly_white"
)

fig2.show()

In [13]:
df_melted = df.melt(id_vars=['Age'], 
                    value_vars=['Anxiety', 'Depression', 'Insomnia', 'OCD'], 
                    var_name='Disorder', 
                    value_name='Rating')

# Create a histogram
fig3 = px.histogram(
    df_melted, 
    x="Age", 
    y="Rating", 
    color="Disorder", 
    barmode="group",  # Group bars by Disorder
    title="Distribution of Disorder Ratings Across Ages",
    labels={"Rating": "Disorder Rating", "Age": "Age"},
    template="plotly_white"
)

fig3.update_layout(
    xaxis=dict(title="Age"),
    yaxis=dict(title="Disorder Rating"),
    legend_title="Disorders"
)

fig3.show()

In [14]:
df_melted = df.melt(
    id_vars=['Age'], 
    value_vars=['Anxiety', 'Depression', 'Insomnia', 'OCD'], 
    var_name='Disorder', 
    value_name='Rating'
)

# Create an overlayed histogram
fig4 = px.histogram(
    df_melted,
    x="Age",
    color="Disorder",
    facet_col="Disorder",  # Separate plots for each disorder
    barmode="overlay",  # Overlap the bars
    title="Distribution of Disorder Ratings Across Ages",
    labels={"Rating": "Disorder Rating", "Age": "Age"},
    template="plotly_white"
)

# Update layout for better clarity
fig4.update_layout(
    xaxis=dict(title="Age"),
    yaxis=dict(title="Count of Ratings"),
    legend_title="Disorders"
)

fig4.show()

In [15]:
# seeing how many of each age value exists
df['Age'].value_counts().sort_index()

Age
10.0     1
12.0     3
13.0     8
14.0    17
15.0    21
        ..
72.0     1
73.0     1
74.0     1
80.0     1
89.0     1
Name: count, Length: 61, dtype: int64

In [16]:
# Creating Age Groups and Counts columns
# Define the bins and labels for age groups
bins = [10, 15, 20, 30, 40, 50, 60, float('inf')]  # `float('inf')` represents 60+
labels = ['10-15', '16-20', '21-30', '31-40', '41-50', '51-60', '60+']

# Create the 'Age Groups' column, ensuring the lowest value (10) is included
df['Age Groups'] = pd.cut(df['Age'], bins=bins, labels=labels, right=True, include_lowest=True)

# Calculate the counts for each Age Group
age_group_counts = df['Age Groups'].value_counts()

# Map the counts back to the DataFrame
df['Group Counts'] = df['Age Groups'].map(age_group_counts)

In [17]:
columns = df.columns.to_list()
index = columns.index('Age')
index

0

In [18]:
columns

['Age',
 'Primary streaming service',
 'Hours per day',
 'While working',
 'Instrumentalist',
 'Composer',
 'Fav genre',
 'Exploratory',
 'Foreign languages',
 'BPM',
 'Frequency [Classical]',
 'Frequency [Country]',
 'Frequency [EDM]',
 'Frequency [Folk]',
 'Frequency [Gospel]',
 'Frequency [Hip hop]',
 'Frequency [Jazz]',
 'Frequency [K pop]',
 'Frequency [Latin]',
 'Frequency [Lofi]',
 'Frequency [Metal]',
 'Frequency [Pop]',
 'Frequency [R&B]',
 'Frequency [Rap]',
 'Frequency [Rock]',
 'Frequency [Video game music]',
 'Anxiety',
 'Depression',
 'Insomnia',
 'OCD',
 'Music effects',
 'Age Groups',
 'Group Counts']

In [19]:
# moving the Age Groups Column
columns.insert(index + 1, columns.pop(columns.index('Age Groups')))

In [20]:
# moving the Group Counts Columns
columns.insert(index + 2, columns.pop(columns.index('Group Counts')))

In [21]:
# Reorganizing the Dataframe
df = df[columns]

In [22]:
df.sort_values(by = 'Age')

Unnamed: 0,Age,Age Groups,Group Counts,Primary streaming service,Hours per day,While working,Instrumentalist,Composer,Fav genre,Exploratory,Foreign languages,BPM,Frequency [Classical],Frequency [Country],Frequency [EDM],Frequency [Folk],Frequency [Gospel],Frequency [Hip hop],Frequency [Jazz],Frequency [K pop],Frequency [Latin],Frequency [Lofi],Frequency [Metal],Frequency [Pop],Frequency [R&B],Frequency [Rap],Frequency [Rock],Frequency [Video game music],Anxiety,Depression,Insomnia,OCD,Music effects
369,10.0,10-15,50.0,YouTube Music,2.0,Yes,Yes,No,Pop,Yes,Yes,112.0,Very frequently,Never,Rarely,Never,Never,Rarely,Rarely,Very frequently,Never,Rarely,Never,Very frequently,Never,Rarely,Never,Very frequently,8.0,2.0,1.0,1.0,Improve
411,12.0,10-15,50.0,Spotify,0.5,Yes,Yes,Yes,Classical,Yes,Yes,109.0,Very frequently,Never,Never,Never,Never,Never,Rarely,Never,Never,Rarely,Never,Sometimes,Never,Never,Never,Never,7.0,8.0,6.0,2.0,Improve
433,12.0,10-15,50.0,Spotify,2.0,Yes,Yes,Yes,Classical,Yes,Yes,,Very frequently,Rarely,Never,Never,Rarely,Sometimes,Rarely,Rarely,Never,Never,Never,Sometimes,Never,Rarely,Sometimes,Never,8.0,0.0,1.0,6.0,Improve
384,12.0,10-15,50.0,YouTube Music,2.0,Yes,Yes,Yes,Classical,No,No,,Very frequently,Never,Never,Never,Never,Never,Never,Never,Never,Rarely,Never,Rarely,Never,Never,Never,Never,0.0,0.0,3.0,0.0,Improve
127,13.0,10-15,50.0,Spotify,2.0,Yes,Yes,Yes,Rock,Yes,No,120.0,Rarely,Never,Sometimes,Very frequently,Rarely,Sometimes,Never,Never,Never,Sometimes,Sometimes,Very frequently,Rarely,Very frequently,Very frequently,Very frequently,7.0,10.0,5.0,6.0,Worsen
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
429,73.0,60+,21.0,Pandora,3.0,Yes,No,No,R&B,Yes,Yes,4.0,Sometimes,Very frequently,Rarely,Sometimes,Very frequently,Sometimes,Sometimes,Rarely,Very frequently,Never,Rarely,Sometimes,Very frequently,Sometimes,Sometimes,Never,5.0,4.0,0.0,1.0,Improve
494,74.0,60+,21.0,I do not use a streaming service.,1.0,No,No,No,Pop,No,No,129.0,Rarely,Very frequently,Never,Sometimes,Sometimes,Never,Rarely,Never,Sometimes,Never,Never,Very frequently,Sometimes,Never,Sometimes,Never,4.0,2.0,1.0,0.0,Improve
527,80.0,60+,21.0,I do not use a streaming service.,3.0,Yes,Yes,No,Classical,No,No,122.0,Very frequently,Rarely,Never,Rarely,Never,Never,Rarely,Rarely,Never,Rarely,Rarely,Sometimes,Never,Never,Never,Sometimes,7.0,3.0,9.0,2.0,Improve
695,89.0,60+,21.0,Spotify,24.0,Yes,Yes,Yes,Rap,No,No,143.0,Never,Never,Rarely,Rarely,Never,Very frequently,Sometimes,Never,Never,Rarely,Rarely,Rarely,Sometimes,Very frequently,Rarely,Never,0.0,0.0,0.0,0.0,No effect


In [23]:
df['Age Groups'].value_counts()

Age Groups
16-20    289
21-30    247
31-40     76
10-15     50
41-50     26
51-60     26
60+       21
Name: count, dtype: int64

In [24]:
df.shape

(736, 33)

In [25]:
df.sort_values(by = 'Age Groups')[['Age Groups', 'Anxiety', 'Depression', 'Insomnia', 'OCD']]

Unnamed: 0,Age Groups,Anxiety,Depression,Insomnia,OCD
249,10-15,9.0,6.0,1.0,7.0
396,10-15,1.0,2.0,4.0,3.0
395,10-15,6.0,8.0,8.0,2.0
498,10-15,4.0,0.0,0.0,0.0
392,10-15,7.0,5.0,3.0,2.0
...,...,...,...,...,...
523,60+,3.0,5.0,0.0,2.0
514,60+,5.0,2.0,4.0,0.0
453,60+,2.0,2.0,2.0,0.0
490,60+,1.0,0.0,0.0,0.0


In [26]:
# Group by 'Age Groups' and calculate the mean for each column
group_averages = df.groupby('Age Groups')[['Anxiety', 'Depression', 'Insomnia', 'OCD']].mean()

# Merge these averages back into the original DataFrame as separate columns
df = df.merge(
    group_averages.rename(columns={
        'Anxiety': 'Anxiety Group Rating',
        'Depression': 'Depression Group Rating',
        'Insomnia': 'Insomnia Group Rating',
        'OCD': 'OCD Group Rating'
    }),
    on='Age Groups',
    how='left'
)





In [27]:
df.drop(columns = 'Average Group Rating', inplace = True)

KeyError: "['Average Group Rating'] not found in axis"

In [None]:
columns2 = df.columns.to_list()
columns2

['Age',
 'Age Groups',
 'Group Counts',
 'Primary streaming service',
 'Hours per day',
 'While working',
 'Instrumentalist',
 'Composer',
 'Fav genre',
 'Exploratory',
 'Foreign languages',
 'BPM',
 'Frequency [Classical]',
 'Frequency [Country]',
 'Frequency [EDM]',
 'Frequency [Folk]',
 'Frequency [Gospel]',
 'Frequency [Hip hop]',
 'Frequency [Jazz]',
 'Frequency [K pop]',
 'Frequency [Latin]',
 'Frequency [Lofi]',
 'Frequency [Metal]',
 'Frequency [Pop]',
 'Frequency [R&B]',
 'Frequency [Rap]',
 'Frequency [Rock]',
 'Frequency [Video game music]',
 'Anxiety',
 'Depression',
 'Insomnia',
 'OCD',
 'Music effects',
 'Anxiety Group Rating',
 'Depression Group Rating',
 'Insomnia Group Rating',
 'OCD Group Rating']

In [None]:
df.sort_values(by = 'Age Groups')[['Age Groups', 'Anxiety Group Rating', 'Depression Group Rating', 'Insomnia Group Rating', 'OCD Group Rating']]

In [None]:
# Sort the DataFrame by 'Age Groups'
sorted_df = df.sort_values(by='Age Groups')

# Select the desired columns and drop duplicate rows for 'Age Groups'
unique_age_groups = sorted_df[['Age Groups', 'Anxiety Group Rating', 'Depression Group Rating', 'Insomnia Group Rating', 'OCD Group Rating']].drop_duplicates()

unique_age_groups

Unnamed: 0,Age Groups,Anxiety Group Rating,Depression Group Rating,Insomnia Group Rating,OCD Group Rating
249,10-15,6.12,4.22,4.06,2.86
271,16-20,5.83218,4.564014,3.558824,2.653979
609,21-30,6.271255,5.574899,3.724696,2.894737
181,31-40,5.723684,5.0,4.328947,2.605263
91,41-50,5.5,4.538462,4.5,1.846154
410,51-60,2.730769,2.038462,3.269231,1.461538
513,60+,4.666667,3.285714,3.190476,1.190476
12,,,,,


In [None]:
for i in df:
    print(i)

Age
Age Groups
Group Counts
Primary streaming service
Hours per day
While working
Instrumentalist
Composer
Fav genre
Exploratory
Foreign languages
BPM
Frequency [Classical]
Frequency [Country]
Frequency [EDM]
Frequency [Folk]
Frequency [Gospel]
Frequency [Hip hop]
Frequency [Jazz]
Frequency [K pop]
Frequency [Latin]
Frequency [Lofi]
Frequency [Metal]
Frequency [Pop]
Frequency [R&B]
Frequency [Rap]
Frequency [Rock]
Frequency [Video game music]
Anxiety
Depression
Insomnia
OCD
Music effects
Anxiety Group Rating
Depression Group Rating
Insomnia Group Rating
OCD Group Rating


In [None]:
df_music_effects = df[df['Music effects'] == 'Improve']
df_music_effects

In [None]:
improve_count = df[df['Music effects'] == 'Improve'].shape[0]
improve_count

542

In [None]:
no_effect_count = df[df['Music effects'] == 'No effect'].shape[0]
no_effect_count

169

In [None]:
worsen_count = df[df['Music effects'] == 'Worsen'].shape[0]
worsen_count

17

In [None]:
NaN_count = df['Music effects'].isna().sum()
NaN_count

np.int64(8)

In [None]:
df['Frequency [Video game music]'].unique()

array(['Sometimes', 'Rarely', 'Very frequently', 'Never'], dtype=object)

In [None]:
# Step 1: Filter rows where 'Music effects' is 'Improve' (case-insensitive, strip extra spaces)
filtered_df = df[df['Music effects'].str.strip().str.lower() == 'improve']

# Step 2: Check for 'Very frequently' in any column (case-insensitive)
# Fill missing values with empty strings to avoid errors
filtered_df = filtered_df.loc[
    filtered_df.applymap(lambda x: str(x).strip().lower()).eq('very frequently').any(axis=1)
]

# Return the filtered DataFrame displaying all columns
filtered_df

In [None]:
df.to_csv('newdf.csv', index = True)

In [None]:
# Step 1: Filter rows where 'Music effects' is 'Improve'
filtered_df = df[df["Music effects"] == "Improve"]

# Step 2: Check if any music category column contains 'Very frequently'
frequency_columns = [col for col in df.columns if col.startswith("Frequency")]
filtered_df = filtered_df[filtered_df[frequency_columns].eq("Very frequently").any(axis=1)]

# Step 3: Group by 'Age Groups' and count occurrences
age_group_counts = filtered_df.groupby("Age Groups").size().reset_index(name="Count")

# Step 4: Create a bar graph
fig = px.bar(
    age_group_counts,
    x="Age Groups",
    y="Count",
    title="Relationship Between Age Groups and 'Very Frequently' + 'Improve'",
    labels={"Count": "Count of Rows", "Age Groups": "Age Groups"},
    text="Count"
)

# Show the graph
fig.show()





In [None]:
# Step 1: Filter rows where 'Music effects' is 'Improve'
filtered_df = df[df["Music effects"] == "Improve"]

# Step 2: Extract columns starting with 'Frequency'
frequency_columns = [col for col in df.columns if col.startswith("Frequency")]

# Step 3: Count 'Very frequently' occurrences for each music type
very_frequent_counts = (filtered_df[frequency_columns] == "Very frequently").sum().reset_index()
very_frequent_counts.columns = ["Music Type", "Count"]  # Rename columns for clarity

# Step 4: Create a bar graph
fig = px.bar(
    very_frequent_counts,
    x="Music Type",
    y="Count",
    title="Music Types with 'Very Frequently' and 'Improve'",
    labels={"Count": "Count of Very Frequently", "Music Type": "Music Types"},
    text="Count"
)

# Show the graph
fig.show()

In [None]:
# Step 1: Filter rows where 'Music effects' is 'Improve'
filtered_df = df[df["Music effects"] == "Improve"]

# Step 2: Extract columns starting with 'Frequency'
frequency_columns = [col for col in df.columns if col.startswith("Frequency")]

# Step 3: Count 'Very frequently' occurrences for each music type
very_frequent_counts = (filtered_df[frequency_columns] == "Very frequently").sum().reset_index()
very_frequent_counts.columns = ["Music Type", "Count"]  # Rename columns for clarity

# Extract only the text inside the square brackets for x-axis labels
very_frequent_counts["Music Type"] = very_frequent_counts["Music Type"].str.extract(r"\[(.*?)\]")

# Step 4: Create a bar graph
fig = px.bar(
    very_frequent_counts,
    x="Music Type",
    y="Count",
    title="Music Types with 'Very Frequently' and 'Improve'",
    labels={"Count": "Count of Very Frequently", "Music Type": "Music Types"},
    text="Count"
)

# Show the graph
fig.show()

In [28]:
for i in df:
    print(i)

Age
Age Groups
Group Counts
Primary streaming service
Hours per day
While working
Instrumentalist
Composer
Fav genre
Exploratory
Foreign languages
BPM
Frequency [Classical]
Frequency [Country]
Frequency [EDM]
Frequency [Folk]
Frequency [Gospel]
Frequency [Hip hop]
Frequency [Jazz]
Frequency [K pop]
Frequency [Latin]
Frequency [Lofi]
Frequency [Metal]
Frequency [Pop]
Frequency [R&B]
Frequency [Rap]
Frequency [Rock]
Frequency [Video game music]
Anxiety
Depression
Insomnia
OCD
Music effects
Anxiety Group Rating
Depression Group Rating
Insomnia Group Rating
OCD Group Rating


In [29]:
df

Unnamed: 0,Age,Age Groups,Group Counts,Primary streaming service,Hours per day,While working,Instrumentalist,Composer,Fav genre,Exploratory,Foreign languages,BPM,Frequency [Classical],Frequency [Country],Frequency [EDM],Frequency [Folk],Frequency [Gospel],Frequency [Hip hop],Frequency [Jazz],Frequency [K pop],Frequency [Latin],Frequency [Lofi],Frequency [Metal],Frequency [Pop],Frequency [R&B],Frequency [Rap],Frequency [Rock],Frequency [Video game music],Anxiety,Depression,Insomnia,OCD,Music effects,Anxiety Group Rating,Depression Group Rating,Insomnia Group Rating,OCD Group Rating
0,18.0,16-20,289.0,Spotify,3.0,Yes,Yes,Yes,Latin,Yes,Yes,156.0,Rarely,Never,Rarely,Never,Never,Sometimes,Never,Very frequently,Very frequently,Rarely,Never,Very frequently,Sometimes,Very frequently,Never,Sometimes,3.0,0.0,1.0,0.0,,5.832180,4.564014,3.558824,2.653979
1,63.0,60+,21.0,Pandora,1.5,Yes,No,No,Rock,Yes,No,119.0,Sometimes,Never,Never,Rarely,Sometimes,Rarely,Very frequently,Rarely,Sometimes,Rarely,Never,Sometimes,Sometimes,Rarely,Very frequently,Rarely,7.0,2.0,2.0,1.0,,4.666667,3.285714,3.190476,1.190476
2,18.0,16-20,289.0,Spotify,4.0,No,No,No,Video game music,No,Yes,132.0,Never,Never,Very frequently,Never,Never,Rarely,Rarely,Very frequently,Never,Sometimes,Sometimes,Rarely,Never,Rarely,Rarely,Very frequently,7.0,7.0,10.0,2.0,No effect,5.832180,4.564014,3.558824,2.653979
3,61.0,60+,21.0,YouTube Music,2.5,Yes,No,Yes,Jazz,Yes,Yes,84.0,Sometimes,Never,Never,Rarely,Sometimes,Never,Very frequently,Sometimes,Very frequently,Sometimes,Never,Sometimes,Sometimes,Never,Never,Never,9.0,7.0,3.0,3.0,Improve,4.666667,3.285714,3.190476,1.190476
4,18.0,16-20,289.0,Spotify,4.0,Yes,No,No,R&B,Yes,No,107.0,Never,Never,Rarely,Never,Rarely,Very frequently,Never,Very frequently,Sometimes,Sometimes,Never,Sometimes,Very frequently,Very frequently,Never,Rarely,7.0,2.0,5.0,9.0,Improve,5.832180,4.564014,3.558824,2.653979
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
731,17.0,16-20,289.0,Spotify,2.0,Yes,Yes,No,Rock,Yes,Yes,120.0,Very frequently,Rarely,Never,Sometimes,Never,Sometimes,Rarely,Never,Sometimes,Rarely,Rarely,Very frequently,Never,Rarely,Very frequently,Never,7.0,6.0,0.0,9.0,Improve,5.832180,4.564014,3.558824,2.653979
732,18.0,16-20,289.0,Spotify,1.0,Yes,Yes,No,Pop,Yes,Yes,160.0,Rarely,Rarely,Never,Never,Never,Never,Rarely,Never,Never,Rarely,Never,Very frequently,Never,Never,Sometimes,Sometimes,3.0,2.0,2.0,5.0,Improve,5.832180,4.564014,3.558824,2.653979
733,19.0,16-20,289.0,Other streaming service,6.0,Yes,No,Yes,Rap,Yes,No,120.0,Rarely,Sometimes,Sometimes,Rarely,Rarely,Very frequently,Rarely,Rarely,Rarely,Sometimes,Rarely,Sometimes,Sometimes,Sometimes,Rarely,Rarely,2.0,2.0,2.0,2.0,Improve,5.832180,4.564014,3.558824,2.653979
734,19.0,16-20,289.0,Spotify,5.0,Yes,Yes,No,Classical,No,No,170.0,Very frequently,Never,Never,Never,Never,Never,Rarely,Never,Never,Never,Never,Never,Never,Never,Never,Sometimes,2.0,3.0,2.0,1.0,Improve,5.832180,4.564014,3.558824,2.653979


In [30]:
# Step 1: Filter rows where 'Music effects' is 'Improve'
filtered_df = df[df["Music effects"] == "Improve"]

# Step 2: Include rows where at least one condition is 5 or greater
condition_columns = ["Anxiety", "Depression", "Insomnia", "OCD"]
filtered_df = filtered_df[filtered_df[condition_columns].ge(5).any(axis=1)]

# Step 3: Extract music types with 'Very frequently'
frequency_columns = [col for col in df.columns if col.startswith("Frequency")]
music_type_conditions = filtered_df.melt(
    id_vars=condition_columns + ["Music effects"], 
    value_vars=frequency_columns, 
    var_name="Music Type", 
    value_name="Frequency"
)
music_type_conditions = music_type_conditions[music_type_conditions["Frequency"] == "Very frequently"]

# Step 4: Count occurrences by Music Type and Condition
music_type_conditions["Music Type"] = music_type_conditions["Music Type"].str.extract(r"\[(.*?)]")  # Extract music type
condition_counts = music_type_conditions.melt(
    id_vars=["Music Type"], 
    value_vars=condition_columns, 
    var_name="Condition", 
    value_name="Severity"
)
condition_counts = condition_counts[condition_counts["Severity"] >= 5]
result = condition_counts.groupby(["Music Type", "Condition"]).size().reset_index(name="Count")

# Step 5: Create a grouped bar chart
fig = px.bar(
    result,
    x="Music Type",
    y="Count",
    color="Condition",
    title="Conditions Improving by Listening to Music Types 'Very Frequently'",
    labels={"Count": "Count of Conditions", "Music Type": "Music Types", "Condition": "Condition"},
    barmode="group",
    text="Count"
)

# Show the graph
fig.show()

In [32]:
# Step 1: Identify all columns starting with "Frequency"
frequency_columns = [col for col in df.columns if col.startswith("Frequency")]

# Step 2: Count occurrences of "Very frequently" for each music type
very_frequent_counts = (df[frequency_columns] == "Very frequently").sum().reset_index()
very_frequent_counts.columns = ["Music Type", "Count"]  # Rename columns for clarity

# Step 3: Extract music type names (e.g., "Metal", "Rock") from column headers
very_frequent_counts["Music Type"] = very_frequent_counts["Music Type"].str.extract(r"\[(.*?)]")

# Step 4: Create a bar graph
fig = px.bar(
    very_frequent_counts,
    x="Music Type",
    y="Count",
    title="Overall Popularity of Music Types ('Very Frequently')",
    labels={"Count": "Count of 'Very Frequently'", "Music Type": "Music Types"},
    text="Count"
)

# Show the graph
fig.show()

In [33]:
df.shape

(736, 37)

In [34]:
df['Music effects'].value_counts()

Music effects
Improve      542
No effect    169
Worsen        17
Name: count, dtype: int64

In [35]:
df['Hours per day'].value_counts().sort_index()

Hours per day
0.00       6
0.10       1
0.25       3
0.50      20
0.70       1
1.00     117
1.50      17
2.00     173
2.50       6
3.00     120
4.00      83
4.50       1
5.00      54
6.00      47
7.00      15
8.00      29
9.00       3
10.00     20
11.00      1
12.00      9
13.00      1
14.00      1
15.00      2
16.00      1
18.00      1
20.00      1
24.00      3
Name: count, dtype: int64

In [36]:
df['Primary streaming service'].value_counts().sort_index()

Primary streaming service
Apple Music                           51
I do not use a streaming service.     71
Other streaming service               50
Pandora                               11
Spotify                              458
YouTube Music                         94
Name: count, dtype: int64

In [37]:
df['Anxiety'].value_counts().sort_index()

Anxiety
0.0      35
1.0      29
2.0      44
3.0      69
4.0      56
5.0      59
6.0      83
7.0     122
7.5       1
8.0     115
9.0      56
10.0     67
Name: count, dtype: int64

In [39]:
df['Depression'].value_counts().sort_index()

Depression
0.0     84
1.0     40
2.0     93
3.0     59
3.5      2
4.0     58
5.0     56
6.0     88
7.0     96
8.0     77
9.0     38
10.0    45
Name: count, dtype: int64

In [40]:
df['Insomnia'].value_counts().sort_index()

Insomnia
0.0     149
1.0      82
2.0      88
3.0      68
3.5       1
4.0      59
5.0      58
6.0      62
7.0      59
8.0      49
9.0      27
10.0     34
Name: count, dtype: int64

In [41]:
df['OCD'].value_counts().sort_index()

OCD
0.0     248
1.0      95
2.0      96
3.0      64
4.0      48
5.0      54
5.5       1
6.0      33
7.0      34
8.0      28
8.5       1
9.0      14
10.0     20
Name: count, dtype: int64

In [43]:
# Use pd.cut to group values into bins
bins = [0, 4, 10]  # Define the bins
labels = ['0-4', '5-10']  # Define labels for the bins
df['Range'] = pd.cut(df['OCD'], bins=bins, labels=labels, right=True)

# Count occurrences in each bin
result = df['Range'].value_counts()
print(result)

Range
0-4     303
5-10    185
Name: count, dtype: int64


In [44]:
# Use pd.cut to group values into bins
bins = [0, 4, 10]  # Define the bins
labels = ['0-4', '5-10']  # Define labels for the bins
df['Range'] = pd.cut(df['Anxiety'], bins=bins, labels=labels, right=True)

# Count occurrences in each bin
result = df['Range'].value_counts()
print(result)

Range
5-10    503
0-4     198
Name: count, dtype: int64


In [45]:
# Use pd.cut to group values into bins
bins = [0, 4, 10]  # Define the bins
labels = ['0-4', '5-10']  # Define labels for the bins
df['Range'] = pd.cut(df['Insomnia'], bins=bins, labels=labels, right=True)

# Count occurrences in each bin
result = df['Range'].value_counts()
print(result)

Range
0-4     298
5-10    289
Name: count, dtype: int64


In [46]:
# Use pd.cut to group values into bins
bins = [0, 4, 10]  # Define the bins
labels = ['0-4', '5-10']  # Define labels for the bins
df['Range'] = pd.cut(df['Depression'], bins=bins, labels=labels, right=True)

# Count occurrences in each bin
result = df['Range'].value_counts()
print(result)

Range
5-10    400
0-4     252
Name: count, dtype: int64
