# Imports

In [1]:
import os
import pandas as pd

In [2]:
cwd = os.getcwd()
parent_dir = os.path.abspath(os.path.join(cwd, os.pardir))
data_dir = os.path.join(parent_dir, "data")

In [3]:
df_personality = pd.read_json(
    os.path.join(data_dir, "personalities_split_3comments_10_300_upsamples.json")
)

# Artists by Genre

In [4]:
pd.DataFrame(df_personality[["channel", "genre"]].value_counts()).sort_values(
    by=["genre", "count"]
)

Unnamed: 0_level_0,Unnamed: 1_level_0,count
channel,genre,Unnamed: 2_level_1
raycharles,blues,377
tylerbryant&theshakedown,blues,618
marcusking,blues,1225
kirk fletcher,blues,3400
kaleo,blues,4455
bach,classical,1856
mozart,classical,7648
beethoven,classical,9483
chopin,classical,12388
drake,hip_hop_rap,15203


# Personality mapping and counting

In [5]:
# Create a mapping for personality types
personality_mapping = {"I": "E", "N": "S", "T": "F", "J": "P"}

# Apply the personality mapping and combine the personality values into strings
df_personality["I"] = df_personality["I"].apply(lambda x: "E" if x > 0.5 else "I")
df_personality["N"] = df_personality["N"].apply(lambda x: "S" if x > 0.5 else "N")
df_personality["T"] = df_personality["T"].apply(lambda x: "F" if x > 0.5 else "T")
df_personality["J"] = df_personality["J"].apply(lambda x: "P" if x > 0.5 else "J")

# Combine the personality values into strings
df_personality["personality"] = df_personality[["I", "N", "T", "J"]].agg(
    "".join, axis=1
)

# Group by artist and count the occurrences of each personality type
personality_count = (
    df_personality.groupby(["channel", "personality"]).size().reset_index(name="count")
)

# Convert the result to the desired format
personality_count_by_channel = (
    personality_count.groupby("channel")
    .apply(lambda x: x.set_index("personality")["count"].to_dict())
    .to_dict()
)

In [6]:
df_personality_count = (
    pd.DataFrame(personality_count_by_channel)
    .fillna(0)
    .reset_index()
    .rename(columns={"index": "personality"})
)

In [7]:
df_personality_count

Unnamed: 0,personality,anthrax,ariana_grande,bach,beethoven,billie_eilish,bonjovi,chopin,drake,ed_sheeran,...,olivia,pinkfloyd,queen,raycharles,skrillex,slayer,systemofadown,taylor_swift,thebeatles,tylerbryant&theshakedown
0,ENFJ,24.0,144.0,34.0,112.0,403.0,257.0,225.0,61.0,421.0,...,99.0,289,415.0,10.0,50.0,49.0,357.0,159.0,292.0,2.0
1,ENFP,335.0,284.0,40.0,227.0,658.0,591.0,422.0,302.0,987.0,...,323.0,923,785.0,9.0,292.0,123.0,535.0,414.0,883.0,11.0
2,ENTJ,25.0,26.0,28.0,34.0,31.0,15.0,69.0,25.0,44.0,...,16.0,60,53.0,1.0,9.0,28.0,87.0,25.0,77.0,3.0
3,ENTP,217.0,37.0,29.0,64.0,62.0,69.0,53.0,72.0,56.0,...,31.0,328,134.0,7.0,35.0,112.0,209.0,35.0,196.0,1.0
4,ESFJ,995.0,3772.0,286.0,1685.0,6019.0,3981.0,2573.0,1519.0,6448.0,...,3027.0,4464,7330.0,89.0,2358.0,862.0,3589.0,4472.0,5088.0,120.0
5,ESFP,1199.0,2232.0,149.0,1379.0,3730.0,2077.0,1921.0,1390.0,4451.0,...,2607.0,3388,4144.0,64.0,2061.0,519.0,1922.0,3129.0,3635.0,114.0
6,ESTJ,2739.0,4040.0,800.0,2833.0,3781.0,2637.0,3442.0,5102.0,3502.0,...,3406.0,5483,7085.0,79.0,3076.0,3406.0,3343.0,4619.0,6386.0,149.0
7,ESTP,4993.0,4201.0,379.0,2652.0,3691.0,2868.0,2471.0,6316.0,3629.0,...,5008.0,5993,5809.0,82.0,3817.0,3286.0,3541.0,4426.0,6149.0,198.0
8,INFJ,22.0,24.0,12.0,80.0,96.0,94.0,191.0,33.0,140.0,...,32.0,185,167.0,1.0,16.0,41.0,96.0,31.0,131.0,1.0
9,INFP,506.0,355.0,82.0,391.0,980.0,773.0,974.0,361.0,1803.0,...,251.0,1537,1379.0,34.0,387.0,276.0,746.0,276.0,1540.0,18.0


# Personality percentage

In [8]:
# Calculate the total number of fans for each artist
total_fans = df_personality_count.drop("personality", axis=1).sum(axis=0)

# Convert the counts to percentages
df_percentages = df_personality_count.drop("personality", axis=1).div(total_fans) * 100

# Concatenate the Personality column and the percentage values
df_personality_percentage = pd.concat(
    [df_personality_count["personality"], df_percentages], axis=1
)

In [9]:
# Find the artist with the highest percentage for each personality
df_personality_percentage.set_index("personality").idxmax(axis=1)

personality
ENFJ       raycharles
ENFP       ed_sheeran
ENTJ             bach
ENTP          anthrax
ESFJ    billie_eilish
ESFP       ed_sheeran
ESTJ             bach
ESTP          anthrax
INFJ           chopin
INFP       raycharles
INTJ           mozart
INTP           slayer
ISTP       marcusking
ISFP            queen
ISFJ          bonjovi
ISTJ        pinkfloyd
dtype: object

In [10]:
df_personality_percentage.set_index("personality", inplace=True)

# TOP 5 Artists for each personality

In [11]:
# Create an empty dataframe to store the top 3 artists for each personality
df_top_artists = pd.DataFrame(
    columns=[
        "Personality",
        "1st Artist",
        "1st Percentage",
        "2nd Artist",
        "2nd Percentage",
        "3rd Artist",
        "3rd Percentage",
    ]
)

top_artists_list = []

# Iterate through each personality and get the top 3 artists
for personality in df_personality_percentage.index:
    top_artists = df_personality_percentage.loc[personality].nlargest(5)
    top_artists_list.append(
        {
            "Personality": personality,
            "1st Artist": top_artists.index[0],
            "1st Percentage": top_artists.iloc[0],
            "2nd Artist": top_artists.index[1],
            "2nd Percentage": top_artists.iloc[1],
            "3rd Artist": top_artists.index[2],
            "3rd Percentage": top_artists.iloc[2],
            "4th Artist": top_artists.index[3],
            "4th Percentage": top_artists.iloc[3],
            "5th Artist": top_artists.index[4],
            "5th Percentage": top_artists.iloc[4],
        },
    )
df_top_artists = pd.DataFrame(top_artists_list)

# Display the resulting dataframe
df_top_artists

Unnamed: 0,Personality,1st Artist,1st Percentage,2nd Artist,2nd Percentage,3rd Artist,3rd Percentage,4th Artist,4th Percentage,5th Artist,5th Percentage
0,ENFJ,raycharles,2.65252,systemofadown,2.460711,kanye_west,2.072883,eminem,2.069836,billie_eilish,2.066985
1,ENFP,ed_sheeran,4.586218,bonjovi,4.412753,pinkfloyd,4.045052,systemofadown,3.687621,thebeatles,3.607173
2,ENTJ,bach,1.508621,mozart,0.980649,systemofadown,0.599669,chopin,0.556991,kanye_west,0.541474
3,ENTP,anthrax,1.943574,raycharles,1.856764,megadeth,1.639929,bach,1.5625,eminem,1.52829
4,ESFJ,billie_eilish,30.871416,ed_sheeran,29.961433,bonjovi,29.724483,queen,26.647762,taylor_swift,25.413423
5,ESFP,ed_sheeran,20.682124,kaleo,19.618406,billie_eilish,19.131148,marcusking,18.693878,tylerbryant&theshakedown,18.446602
6,ESTJ,bach,43.103448,slayer,38.634301,drake,33.559166,mozart,33.041318,kirk fletcher,31.058824
7,ESTP,anthrax,44.720107,drake,41.544432,megadeth,38.039216,ledzeppelin,37.499532,nicki_minaj,37.274064
8,INFJ,chopin,1.541815,beethoven,0.843615,pinkfloyd,0.810763,bonjovi,0.701859,systemofadown,0.661704
9,INFP,raycharles,9.018568,ed_sheeran,8.377863,chopin,7.862448,pinkfloyd,6.73591,thebeatles,6.291107
