In [4]:
import pandas as pd
import statistics
import numpy as np

# Declaration
# Descriptive Statistic : Max, Min, Range, Mean, Mode, Var, Std for numeric Column
data = pd.read_csv("Spotify Youtube Dataset.csv")

target_col = ['Danceability','Energy','Key','Loudness','Speechiness','Acousticness','Instrumentalness','Liveness','Valence','Tempo']

for dt in target_col:
    # Convert to int
    data[dt] = pd.to_numeric(data[dt], errors='coerce')
    
    # Remove NaN
    data = data.dropna(subset=[dt])

    max_val = data[dt].max()
    min_val = data[dt].min()
    range_val = max_val - min_val
    mean = round(data[dt].mean(),2)
    mode = round(statistics.mode(data[dt]),2)
    var = round(np.nanvar(data[dt],ddof=1),2)
    std = round(var ** 0.5,2)

    print(f"Max of {dt.title()} : {max_val}")
    print(f"Min of {dt.title()} : {min_val}")
    print(f"Range of {dt.title()} : {range_val}")
    print(f"Mean of {dt.title()} : {mean}")
    print(f"Mode of {dt.title()} : {mode}")
    print(f"Variance of {dt.title()} : {var}")
    print(f"Standard Deviance of {dt.title()} : {std}\n")

Max of Danceability : 0.975
Min of Danceability : 0.0
Range of Danceability : 0.975
Mean of Danceability : 0.62
Mode of Danceability : 0.69
Variance of Danceability : 0.03
Standard Deviance of Danceability : 0.17

Max of Energy : 1.0
Min of Energy : 2.03e-05
Range of Energy : 0.9999797
Mean of Energy : 0.64
Mode of Energy : 0.57
Variance of Energy : 0.05
Standard Deviance of Energy : 0.22

Max of Key : 11.0
Min of Key : 0.0
Range of Key : 11.0
Mean of Key : 5.3
Mode of Key : 0.0
Variance of Key : 12.79
Standard Deviance of Key : 3.58

Max of Loudness : 0.92
Min of Loudness : -46.251
Range of Loudness : 47.171
Mean of Loudness : -7.67
Mode of Loudness : -7.82
Variance of Loudness : 21.46
Standard Deviance of Loudness : 4.63

Max of Speechiness : 0.964
Min of Speechiness : 0.0
Range of Speechiness : 0.964
Mean of Speechiness : 0.1
Mode of Speechiness : 0.03
Variance of Speechiness : 0.01
Standard Deviance of Speechiness : 0.1

Max of Acousticness : 0.996
Min of Acousticness : 1.11e-06
Ra

In [28]:
import pandas as pd
import statistics
import numpy as np

# Display settings
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.width', 0)
pd.set_option('display.float_format', '{:.0f}'.format) 

# Declaration
# Descriptive Statistic : Total Song, View, Like, and Comment For Each Album Type
data = pd.read_csv("Spotify Youtube Dataset.csv")

# Group by Country and Satellite Type
grouped = data.groupby(['Album_type'])

summary = grouped.agg(
    Total_Song=('Artist', 'count'),
    Total_View=('Views', 'sum'),
    Total_Like=('Likes', 'sum'),
    Total_Comment=('Comments', 'sum')
).reset_index()

print(summary)

    Album_type  Total_Song    Total_View  Total_Like  Total_Comment
0        album       14926 1438910059139  9530475201      409846274
1  compilation         788   61226486759   391331369       13027260
2       single        5004  401916456409  3462426024      131606670


In [30]:
import pandas as pd
import statistics
import numpy as np

# Display settings
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.width', 0)
pd.set_option('display.float_format', '{:.0f}'.format) 

# Declaration
# Descriptive Statistic : Total Song, View, Like, and Comment For Licensed Comparison
data = pd.read_csv("Spotify Youtube Dataset.csv")

# Group by Licensed
grouped = data.groupby(['Licensed'])

summary = grouped.agg(
    Total_Song=('Artist', 'count'),
    Total_View=('Views', 'sum'),
    Total_Like=('Likes', 'sum'),
    Total_Comment=('Comments', 'sum')
).reset_index()

# Convert Licensed column to boolean
summary['Licensed'] = summary['Licensed'].astype(bool)

print(summary)

   Licensed  Total_Song    Total_View  Total_Like  Total_Comment
0     False        6108  188834565877  1522021097       56153499
1      True       14140 1713218436430 11862211497      498326705


In [9]:
import pandas as pd
import statistics
import numpy as np

# Display settings
pd.set_option('display.max_columns', None)  
pd.set_option('display.expand_frame_repr', False)
pd.set_option('display.width', 0)
pd.set_option('display.float_format', '{:.0f}'.format) 

# Declaration
# Descriptive Statistic : Total Song, View, Like, Comment, and Percentage Distribution For Song With Official Video or Not
data = pd.read_csv("Spotify Youtube Dataset.csv")

# Group by Licensed
grouped = data.groupby(['official_video'])

summary = grouped.agg(
    Total_Song=('Artist', 'count'),
    Total_View=('Views', 'sum'),
    Total_Like=('Likes', 'sum'),
    Total_Comment=('Comments', 'sum')
).reset_index()

# Convert Licensed column to boolean
summary['official_video'] = summary['official_video'].astype(bool)

# Add percentage column
total_songs = summary['Total_Song'].sum()
summary['Song_Percentage'] = (summary['Total_Song'] / total_songs * 100).round(2)

print(summary)

   official_video  Total_Song    Total_View  Total_Like  Total_Comment  Song_Percentage
0           False        4525  101851930718   788148327       33089082               22
1            True       15723 1800201071589 12596084267      521391122               78
