**<h2> Project </h2>**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import pandas as pd
from sklearn.cluster import KMeans

In [None]:
file_path = '/content/drive/My Drive/data_ai/game_data/data_score.csv'
df = pd.read_csv(file_path)

**<h2> Preprocessing </h2>**

In [None]:
df.sort_values(by='force', ascending=False, inplace=True)
df.reset_index(drop=True, inplace=True)

In [None]:
df.head()

Unnamed: 0,ID,force,song1,song2,song3,song4,song5,song6,song7,song8,...,song91,song92,song93,song94,song95,song96,song97,song98,song99,song100
0,SV-2505-3189,21.297,9966614,10000000,9965229,9928247,9970389,9975388,9938271,9934014,...,9939531,9985875,9934733,9966666,9944386,9912729,9952326,9938206,9946492,9979088
1,SV-6555-7968,21.247,9935192,10000000,9979137,9962235,9974619,9935630,9963348,9978714,...,9933026,9738468,9884653,9784580,9781073,9967366,9960000,9965241,9965392,9918001
2,SV-6546-7094,21.234,9978397,9997039,9977399,9977341,9955583,9924271,9934413,9985100,...,9933026,9933428,9917251,9841269,9976459,9930476,9993333,9991310,9902196,9965675
3,SV-2165-8973,21.231,9980361,9979277,9954798,9960347,9945008,9969708,9965277,9980842,...,9953955,9888254,9979939,9969765,9981167,9938989,9953333,9980882,9966897,9973302
4,SV-3164-3276,21.229,9929300,9997039,9970445,9975453,9976734,9918591,9978780,9980842,...,9943491,9902520,9989969,9920634,9976459,9951759,9953333,9960027,9968402,9925629


In [None]:
close_boundary = 10
song_columns = [col for col in df.columns if col.startswith('song')]

In [None]:
for col in song_columns:
    for idx in range(len(df)):
        if df.at[idx, col] != 0:
            continue

        neighbors = []
        for i in range(0, close_boundary):
            if idx - i >= 0:
                neighbors.append(df.at[idx - i, col])
            if idx + i < len(df):
                neighbors.append(df.at[idx + i, col])

        neighbors = [val for val in neighbors if val != 0]
        if neighbors:
            df.at[idx, col] = int(np.mean(neighbors))

# Replace zero with mean

In [None]:
row_means = df[song_columns].mean(axis=1)
row_stds = df[song_columns].std(axis=1)
df[song_columns] = df[song_columns].sub(row_means, axis=0).div(row_stds, axis=0)
df[song_columns] = df[song_columns].clip(lower=-3)

# Standardization

In [None]:
df.head()

Unnamed: 0,ID,force,song1,song2,song3,song4,song5,song6,song7,song8,...,song91,song92,song93,song94,song95,song96,song97,song98,song99,song100
0,SV-2505-3189,21.297,0.645292,1.79376,0.597648,-0.674522,0.77515,0.947115,-0.3297,-0.476139,...,-0.286356,1.307864,-0.451406,0.64708,-0.119345,-1.208337,0.153788,-0.331936,-0.0469,1.074394
1,SV-6555-7968,21.247,-0.254818,0.998972,0.595352,0.268362,0.507945,-0.246344,0.289894,0.587168,...,-0.296722,-3.0,-1.232556,-3.0,-3.0,0.367627,0.225123,0.326517,0.329438,-0.587398
2,SV-6546-7094,21.234,0.32498,0.943985,0.291842,0.289916,-0.432556,-1.472267,-1.135503,0.547552,...,-1.181558,-1.16821,-1.705365,-3.0,0.260629,-1.266231,0.820928,0.753755,-2.205264,-0.097452
3,SV-2165-8973,21.231,0.53905,0.501562,-0.345014,-0.153109,-0.683589,0.17063,0.017389,0.555685,...,-0.374168,-2.646355,0.524456,0.172601,0.566925,-0.891749,-0.395679,0.557068,0.073415,0.294924
4,SV-3164-3276,21.229,-1.10973,1.184064,0.283532,0.453114,0.496491,-1.472361,0.565773,0.635597,...,-0.629191,-2.016561,0.944658,-1.40318,0.487179,-0.349218,-0.295919,-0.069245,0.214351,-1.234038


**<h2> Clustering of users </h2>**

In [None]:
X = df[song_columns]
kmeans = KMeans(n_clusters=6)
clusters = kmeans.fit_predict(X)
df = pd.concat([df, pd.Series(clusters, name='cluster')], axis=1)

In [None]:
df.head()

Unnamed: 0,ID,force,song1,song2,song3,song4,song5,song6,song7,song8,...,song92,song93,song94,song95,song96,song97,song98,song99,song100,cluster
0,SV-2505-3189,21.297,0.645292,1.79376,0.597648,-0.674522,0.77515,0.947115,-0.3297,-0.476139,...,1.307864,-0.451406,0.64708,-0.119345,-1.208337,0.153788,-0.331936,-0.0469,1.074394,2
1,SV-6555-7968,21.247,-0.254818,0.998972,0.595352,0.268362,0.507945,-0.246344,0.289894,0.587168,...,-3.0,-1.232556,-3.0,-3.0,0.367627,0.225123,0.326517,0.329438,-0.587398,2
2,SV-6546-7094,21.234,0.32498,0.943985,0.291842,0.289916,-0.432556,-1.472267,-1.135503,0.547552,...,-1.16821,-1.705365,-3.0,0.260629,-1.266231,0.820928,0.753755,-2.205264,-0.097452,1
3,SV-2165-8973,21.231,0.53905,0.501562,-0.345014,-0.153109,-0.683589,0.17063,0.017389,0.555685,...,-2.646355,0.524456,0.172601,0.566925,-0.891749,-0.395679,0.557068,0.073415,0.294924,2
4,SV-3164-3276,21.229,-1.10973,1.184064,0.283532,0.453114,0.496491,-1.472361,0.565773,0.635597,...,-2.016561,0.944658,-1.40318,0.487179,-0.349218,-0.295919,-0.069245,0.214351,-1.234038,1


**<h2> Song difficulty table </h2>**

In [None]:
cluster_scores = df.groupby('cluster')[song_columns].mean()
cluster_scores.head()

Unnamed: 0_level_0,song1,song2,song3,song4,song5,song6,song7,song8,song9,song10,...,song91,song92,song93,song94,song95,song96,song97,song98,song99,song100
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.341138,-0.224031,0.744256,-0.201048,-0.033698,0.265304,0.377741,0.580292,0.659035,-0.068434,...,-0.429048,-0.442949,-0.292663,-0.32097,-0.241346,-0.208639,-0.224784,-0.279392,-0.297872,-0.350502
1,0.25181,0.37185,0.569425,0.176122,0.03138,-0.196648,0.096107,0.608833,0.556773,-0.151076,...,-1.004739,-1.414151,-0.312613,-2.552505,-0.338661,0.057043,-0.207493,0.108852,-0.117801,-0.770228
2,0.419494,0.611742,0.666709,0.229586,0.201549,0.166141,0.352495,0.664167,0.788419,0.058245,...,-0.675067,-0.753558,-0.50747,-0.625613,-0.405371,-0.295436,-0.31817,-0.303241,-0.436375,-0.570536
3,-0.040655,-0.675029,0.658542,-0.151661,-0.930756,-0.056482,0.158839,0.675199,0.551954,-0.443666,...,-1.086662,-0.445556,-0.609723,0.043958,0.199487,0.041585,0.037598,-0.007818,-0.053733,-0.080147
4,-0.138109,-0.881095,0.640934,-0.326187,-0.989922,-0.076435,0.049856,0.6019,0.521151,-0.471553,...,-0.552685,-1.851253,0.127314,-2.132891,-0.346168,0.377347,0.245834,0.434187,0.347509,-0.323704


In [None]:
cluster_scores.to_csv('cluster_scores.csv')

**<h2> Song type table </h2>**

In [None]:
song_ranks = cluster_scores.rank(axis=1, ascending=True)
song_ranks

Unnamed: 0_level_0,song1,song2,song3,song4,song5,song6,song7,song8,song9,song10,...,song91,song92,song93,song94,song95,song96,song97,song98,song99,song100
cluster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,78.0,37.0,94.0,40.0,54.0,75.0,81.0,89.0,91.0,48.0,...,11.0,10.0,29.0,22.0,34.0,38.0,36.0,30.0,28.0,17.0
1,59.0,67.0,76.0,56.0,45.0,35.0,52.0,78.0,74.0,36.0,...,8.0,4.0,29.0,1.0,28.0,47.0,33.0,55.0,39.0,13.0
2,68.0,79.0,85.0,61.0,60.0,57.0,66.0,84.0,92.0,54.0,...,15.0,8.0,24.0,18.0,31.0,39.0,37.0,38.0,28.0,20.0
3,43.0,17.0,76.0,31.0,13.0,41.0,57.0,77.0,72.0,24.0,...,8.0,23.0,18.0,52.0,61.0,51.0,50.0,45.0,42.0,35.0
4,34.0,12.0,76.0,25.0,10.0,37.0,46.0,75.0,71.0,19.0,...,17.0,4.0,50.0,2.0,24.0,67.0,60.0,70.0,65.0,27.0
5,71.0,2.0,91.0,34.0,23.0,68.0,74.0,81.0,80.0,42.0,...,15.0,18.0,27.0,30.0,45.0,52.0,39.0,50.0,36.0,33.0


In [None]:
max_rank_clusters = song_ranks.idxmax(axis=0)
max_rank_clusters

Unnamed: 0,0
song1,0
song2,2
song3,0
song4,2
song5,2
...,...
song96,4
song97,4
song98,4
song99,4


In [None]:
max_rank_clusters.to_csv('song_clusters.csv')