# Spotify API data gathering

 This jupyter notebook will showcase the procedure followed to gather Spotify Data.<br>
 It is divided into 4 parts:

 - Loading of the original Dataset
 - Spotify API for high-level Data
 - Spotify API for track information
 - Spotify API for low-level Data
 - Assemble of the final dataset

This notebook will only showcase how we extracted the information for the test set (The one Professor Louridas gave us).<br>
That is because, even thought the high-level dataset can be taken approximately in 15 minutes, the low level can take upwards of 8 hours.<br>
With only 1162 songs, it takes 10 minutes.<br>


In [None]:
from spotify_config import config
import spotipy
from spotipy.oauth2 import SpotifyClientCredentials
import pandas as pd

- First we load our data.

In [None]:
tracks =  pd.read_csv('Data/spotify_ids.txt', header=None)
tracks


Unnamed: 0,0
0,7lPN2DXiMsVn7XUKtOW1CS
1,5QO79kh1waicV47BqGRL3g
2,0VjIjW4GlUZAMYd2vXMi3b
3,4MzXwWMhyBbmu6hOcLVD49
4,5Kskr9LcNYa0tpt5f0ZEJx
...,...
1157,4lUmnwRybYH7mMzf16xB0y
1158,1fzf9Aad4y1RWrmwosAK5y
1159,3E3pb3qH11iny6TFDJvsg5
1160,3yTkoTuiKRGL2VAlQd7xsC


- Then we create a Spotify Client, from which we can access the Spotify API.
- Our Credincials are located at `spotify_config.py`

In [None]:
from spotify_config import config

client_credentials_manager = SpotifyClientCredentials(config['client_id'],
                                                      config['client_secret'])
sp = spotipy.Spotify(client_credentials_manager=client_credentials_manager)

- To access a song, we need to give it its `key`.
- We create a list of all the `keys` contained in our dataset.
- Then, we iterate until we get the information for all the songs.
- We can request the songs in batches of 100.


In [None]:
features = {}
all_track_ids = list(tracks[0].unique())
start = 0
num_tracks = 100
while start < len(all_track_ids):
    print(f'getting from {start} to {start+num_tracks}')
    tracks_batch = all_track_ids[start:start+num_tracks]
    features_batch = sp.audio_features(tracks_batch)
    features.update({ track_id : track_features
                     for track_id, track_features in zip(tracks_batch, features_batch) })
    start += num_tracks

getting from 0 to 100
getting from 100 to 200
getting from 200 to 300
getting from 300 to 400
getting from 400 to 500
getting from 500 to 600
getting from 600 to 700
getting from 700 to 800
getting from 800 to 900
getting from 900 to 1000
getting from 1000 to 1100
getting from 1100 to 1200


- We transform the dictionary into a dataframe.

In [None]:
tracks_high_level_analysis = pd.DataFrame.from_dict(features, orient='index')
tracks_high_level_analysis.reset_index(inplace=True)
tracks_high_level_analysis

Unnamed: 0,index,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,type,id,uri,track_href,analysis_url,duration_ms,time_signature
0,7lPN2DXiMsVn7XUKtOW1CS,0.585,0.436,10,-8.761,1,0.0601,0.72100,0.000013,0.1050,0.132,143.874,audio_features,7lPN2DXiMsVn7XUKtOW1CS,spotify:track:7lPN2DXiMsVn7XUKtOW1CS,https://api.spotify.com/v1/tracks/7lPN2DXiMsVn...,https://api.spotify.com/v1/audio-analysis/7lPN...,242014,4
1,5QO79kh1waicV47BqGRL3g,0.680,0.826,0,-5.487,1,0.0309,0.02120,0.000012,0.5430,0.644,118.051,audio_features,5QO79kh1waicV47BqGRL3g,spotify:track:5QO79kh1waicV47BqGRL3g,https://api.spotify.com/v1/tracks/5QO79kh1waic...,https://api.spotify.com/v1/audio-analysis/5QO7...,215627,4
2,0VjIjW4GlUZAMYd2vXMi3b,0.514,0.730,1,-5.934,1,0.0598,0.00146,0.000095,0.0897,0.334,171.005,audio_features,0VjIjW4GlUZAMYd2vXMi3b,spotify:track:0VjIjW4GlUZAMYd2vXMi3b,https://api.spotify.com/v1/tracks/0VjIjW4GlUZA...,https://api.spotify.com/v1/audio-analysis/0VjI...,200040,4
3,4MzXwWMhyBbmu6hOcLVD49,0.731,0.573,4,-10.059,0,0.0544,0.40100,0.000052,0.1130,0.145,109.928,audio_features,4MzXwWMhyBbmu6hOcLVD49,spotify:track:4MzXwWMhyBbmu6hOcLVD49,https://api.spotify.com/v1/tracks/4MzXwWMhyBbm...,https://api.spotify.com/v1/audio-analysis/4MzX...,205090,4
4,5Kskr9LcNYa0tpt5f0ZEJx,0.907,0.393,4,-7.636,0,0.0539,0.45100,0.000001,0.1350,0.202,104.949,audio_features,5Kskr9LcNYa0tpt5f0ZEJx,spotify:track:5Kskr9LcNYa0tpt5f0ZEJx,https://api.spotify.com/v1/tracks/5Kskr9LcNYa0...,https://api.spotify.com/v1/audio-analysis/5Ksk...,205458,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1157,4lUmnwRybYH7mMzf16xB0y,0.596,0.650,9,-5.167,1,0.3370,0.13800,0.000000,0.1400,0.188,133.997,audio_features,4lUmnwRybYH7mMzf16xB0y,spotify:track:4lUmnwRybYH7mMzf16xB0y,https://api.spotify.com/v1/tracks/4lUmnwRybYH7...,https://api.spotify.com/v1/audio-analysis/4lUm...,257428,4
1158,1fzf9Aad4y1RWrmwosAK5y,0.588,0.850,4,-6.431,1,0.0318,0.16800,0.002020,0.0465,0.768,93.003,audio_features,1fzf9Aad4y1RWrmwosAK5y,spotify:track:1fzf9Aad4y1RWrmwosAK5y,https://api.spotify.com/v1/tracks/1fzf9Aad4y1R...,https://api.spotify.com/v1/audio-analysis/1fzf...,187310,4
1159,3E3pb3qH11iny6TFDJvsg5,0.754,0.660,0,-6.811,1,0.2670,0.17900,0.000000,0.1940,0.316,83.000,audio_features,3E3pb3qH11iny6TFDJvsg5,spotify:track:3E3pb3qH11iny6TFDJvsg5,https://api.spotify.com/v1/tracks/3E3pb3qH11in...,https://api.spotify.com/v1/audio-analysis/3E3p...,209299,4
1160,3yTkoTuiKRGL2VAlQd7xsC,0.584,0.836,0,-4.925,1,0.0790,0.05580,0.000000,0.0663,0.484,104.973,audio_features,3yTkoTuiKRGL2VAlQd7xsC,spotify:track:3yTkoTuiKRGL2VAlQd7xsC,https://api.spotify.com/v1/tracks/3yTkoTuiKRGL...,https://api.spotify.com/v1/audio-analysis/3yTk...,202204,4


- We also remove the information not needed.

In [None]:
tracks_high_level_analysis.drop(['uri','track_href','analysis_url','type','id'], axis=1,inplace=True)
tracks_high_level_analysis

Unnamed: 0,index,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_ms,time_signature
0,7lPN2DXiMsVn7XUKtOW1CS,0.585,0.436,10,-8.761,1,0.0601,0.72100,0.000013,0.1050,0.132,143.874,242014,4
1,5QO79kh1waicV47BqGRL3g,0.680,0.826,0,-5.487,1,0.0309,0.02120,0.000012,0.5430,0.644,118.051,215627,4
2,0VjIjW4GlUZAMYd2vXMi3b,0.514,0.730,1,-5.934,1,0.0598,0.00146,0.000095,0.0897,0.334,171.005,200040,4
3,4MzXwWMhyBbmu6hOcLVD49,0.731,0.573,4,-10.059,0,0.0544,0.40100,0.000052,0.1130,0.145,109.928,205090,4
4,5Kskr9LcNYa0tpt5f0ZEJx,0.907,0.393,4,-7.636,0,0.0539,0.45100,0.000001,0.1350,0.202,104.949,205458,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1157,4lUmnwRybYH7mMzf16xB0y,0.596,0.650,9,-5.167,1,0.3370,0.13800,0.000000,0.1400,0.188,133.997,257428,4
1158,1fzf9Aad4y1RWrmwosAK5y,0.588,0.850,4,-6.431,1,0.0318,0.16800,0.002020,0.0465,0.768,93.003,187310,4
1159,3E3pb3qH11iny6TFDJvsg5,0.754,0.660,0,-6.811,1,0.2670,0.17900,0.000000,0.1940,0.316,83.000,209299,4
1160,3yTkoTuiKRGL2VAlQd7xsC,0.584,0.836,0,-4.925,1,0.0790,0.05580,0.000000,0.0663,0.484,104.973,202204,4


- We have succesfully acquired the audio analysis for all of our songs.
- We will continue with the tracks details.

# Tracks Extra Details

We want to access 3 details for each song:
- The year it was created.
- If it is explicit or not.
- It's popularity.

We will follow the same process as before, only this time we will iterate all the songs one by one, and save the values needed in a dataframe.<br>
The Spotify API returns a dictionary for each song.

In [None]:
tracks_details = pd.DataFrame()
features = {}
all_track_ids = list(tracks[0].unique())
start = 0
num_tracks = 100
while start < len(all_track_ids):
    print(f'Song {start}')
    track_id = all_track_ids[start]
    features_batch = sp.track(track_id)
    # the feature_batch is a dictionary.
    details = [features_batch['album']['release_date'], features_batch['explicit'], features_batch['popularity'], track_id]
    if start == 0:

        details_df = pd.DataFrame([details])

    else:

        details_df = details_df.append([details])

    start+=1


getting from 0 to 100
getting from 1 to 101
getting from 2 to 102
getting from 3 to 103
getting from 4 to 104
getting from 5 to 105
getting from 6 to 106
getting from 7 to 107
getting from 8 to 108
getting from 9 to 109
getting from 10 to 110
getting from 11 to 111
getting from 12 to 112
getting from 13 to 113
getting from 14 to 114
getting from 15 to 115
getting from 16 to 116
getting from 17 to 117
getting from 18 to 118
getting from 19 to 119
getting from 20 to 120
getting from 21 to 121
getting from 22 to 122
getting from 23 to 123
getting from 24 to 124
getting from 25 to 125
getting from 26 to 126
getting from 27 to 127
getting from 28 to 128
getting from 29 to 129
getting from 30 to 130
getting from 31 to 131
getting from 32 to 132
getting from 33 to 133
getting from 34 to 134
getting from 35 to 135
getting from 36 to 136
getting from 37 to 137
getting from 38 to 138
getting from 39 to 139
getting from 40 to 140
getting from 41 to 141
getting from 42 to 142
getting from 43 to 14

In [None]:
details_df.reset_index(inplace=True,drop=True)
details_df.rename(columns={0:'year',1:'explicit',2:'popularity',3:'index'}, inplace=True)

- We recode the `explicit` feature to be numerical

In [None]:
import numpy as np

details_df['explicit'] = np.where(details_df['explicit'] == True,1,0)

- Lastly, we only keep the `year` from the `release date`.

In [None]:
details_df['year'] = pd.to_datetime(details_df['year']).dt.year

In [None]:
details_df

Unnamed: 0,year,explicit,popularity,index
0,2021,1,33,7lPN2DXiMsVn7XUKtOW1CS
1,2020,1,90,5QO79kh1waicV47BqGRL3g
2,2020,0,94,0VjIjW4GlUZAMYd2vXMi3b
3,2020,1,87,4MzXwWMhyBbmu6hOcLVD49
4,2021,1,8,5Kskr9LcNYa0tpt5f0ZEJx
...,...,...,...,...
1157,2021,1,77,4lUmnwRybYH7mMzf16xB0y
1158,2021,0,11,1fzf9Aad4y1RWrmwosAK5y
1159,2021,1,64,3E3pb3qH11iny6TFDJvsg5
1160,2021,0,0,3yTkoTuiKRGL2VAlQd7xsC


- We have successfully acquired the details of the track for all of our songs.
- We will continue with the track audio analysis.

# Audio Analysis (Low-Level)


In the Audio analysis, each track is divided into segments. A segment represents a distinct part of the song.<br>
Each segment comes with its own set of values for different variables. <br>
We will focus on:
- `timbre`.
- `pitch`.
- `loudness_start`.
- `loudness_max`.
- `louess_max_time`.
- `loudness_end`.

The function below is responsible for extracting the mean value, for each value, accounting for all the segments of a song.

In [None]:
def keep_segments_with_great_confidence(segments):
    #Initialization

    timbre_list = [0,0,0,0,0,0,0,0,0,0,0,0]
    timbre_count = 0
    pitches_list = [0,0,0,0,0,0,0,0,0,0,0,0]
    pitches_counts = 0
    count = 0
    loudness_start = 0
    loudness_max = 0
    loudness_max_time = 0
    loudness_end = 0

    # For each segment in a song.
    for segment in segments:
        # If we are confident that this is a segment.

        # Get the pitches or trimble and find their mean.
        timbre = segment['timbre']
        timbre_list = [x + y for (x, y) in zip(timbre_list, timbre)]
        timbre_count += 1

        pitches = segment['pitches']
        pitches_list = [x + y for (x, y) in zip(pitches_list, pitches)]
        pitches_counts += 1

        loudness_start += segment['loudness_start']

        loudness_max += segment['loudness_max']

        loudness_max_time += segment['loudness_max_time']

        loudness_end += segment['loudness_end']

        count+=1

    if timbre_count != 0:
        timbre_list = [total / timbre_count for total in timbre_list]
        pitches_list = [total / pitches_counts for total in pitches_list]
        loudness_start /= count
        loudness_max /= count
        loudness_max_time /= count
        loudness_end /= count


    return timbre_list, pitches_list, loudness_start, loudness_max, loudness_max_time, loudness_end


In [None]:
features = {}
all_track_ids = list(tracks_high_level_analysis['index'].unique())

- For each song, we append its values to a Dataframe.

In [None]:
start = 0
keys = []
num_tracks = 100 # We set it to 100 beacuse that the maximum we can fetch at one time
while start < len(all_track_ids) :
    try:
        print(start)
        # The 100 next songs
        tracks_batch = all_track_ids[start]
        features_batch = sp.audio_analysis(tracks_batch)
        segments = features_batch['segments']
        timbre, pitch, loudness_start, loudness_max, loudness_max_time, loudness_end = keep_average_of_segments(segments)
        if start == 0:
            df_timbre = pd.DataFrame([timbre])
            df_pitch = pd.DataFrame([pitch])
            df_loudness_start = pd.DataFrame([loudness_start])

            df_loudness_max = pd.DataFrame([loudness_max])
            df_loudness_max_time = pd.DataFrame([loudness_max_time])
            df_loudness_end = pd.DataFrame([loudness_end])
            keys.append(tracks_high_level_analysis['index'][start])

        else:

            df_timbre = df_timbre.append([timbre])
            df_pitch = df_pitch.append([pitch])
            df_loudness_start = df_loudness_start.append([loudness_start])
            df_loudness_max = df_loudness_max.append([loudness_max])
            df_loudness_max_time = df_loudness_max_time.append([loudness_max_time])
            df_loudness_end = df_loudness_end.append([loudness_end])
            keys.append(tracks_high_level_analysis['index'][start])

    except:
        print('Expection')

    start+=1


0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

HTTP Error for GET to https://api.spotify.com/v1/audio-analysis/4LaGu95Ui2s4vprSQYWUAZ with Params: {} returned 404 due to analysis not found


800
Expection
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037


- Apparently, one song did not have Audio Analysis.
- Since it is only one, it will not have a big influence on the testing results.

In [None]:
tracks_keys = pd.DataFrame(keys, columns=['songs_id'])
tracks_keys

Unnamed: 0,songs_id
0,7lPN2DXiMsVn7XUKtOW1CS
1,5QO79kh1waicV47BqGRL3g
2,0VjIjW4GlUZAMYd2vXMi3b
3,4MzXwWMhyBbmu6hOcLVD49
4,5Kskr9LcNYa0tpt5f0ZEJx
...,...
1156,4lUmnwRybYH7mMzf16xB0y
1157,1fzf9Aad4y1RWrmwosAK5y
1158,3E3pb3qH11iny6TFDJvsg5
1159,3yTkoTuiKRGL2VAlQd7xsC


- Then we rename our dataframes.

In [None]:
df_loudness_start.rename(columns={0:'loudness_start'}, inplace=True)
df_loudness_max.rename(columns={0:'loudness_max'}, inplace=True)
df_loudness_max_time.rename(columns={0:'loudness_max_time'}, inplace=True)
df_loudness_end.rename(columns={0:'loudness_end'}, inplace=True)

- Reset their Indexes.

In [None]:
df_loudness_start.reset_index(inplace=True,drop=True)
df_loudness_max.reset_index(inplace=True,drop=True)
df_loudness_max_time.reset_index(inplace=True,drop=True)
df_loudness_end.reset_index(inplace=True,drop=True)

- And merge them.

In [None]:
loudness_low_level =pd.merge(df_loudness_start, df_loudness_max,left_index=True, right_index=True)
loudness_low_level= pd.merge(loudness_low_level, df_loudness_max_time,left_index=True, right_index=True)
loudness_low_level= pd.merge(loudness_low_level, df_loudness_end,left_index=True, right_index=True)

- The final result is the loudness dataframe.

In [None]:
loudness_low_level

Unnamed: 0,loudness_start,loudness_max,loudness_max_time,loudness_end
0,-20.136485,-11.846137,0.071430,-0.076142
1,-12.366643,-5.266731,0.059288,-0.060194
2,-13.010345,-6.141568,0.059692,-0.074730
3,-21.296216,-12.379307,0.059611,-0.074534
4,-23.091459,-10.516077,0.055798,-0.077121
...,...,...,...,...
1156,-16.965315,-6.245689,0.058101,-0.048662
1157,-14.089263,-7.443791,0.049178,-0.088235
1158,-19.211222,-8.765307,0.061354,-0.064865
1159,-12.448574,-5.485316,0.062063,-0.077922


- We follow the same procedure for `timbre` and `pitch`.

In [None]:
tracks_keys.reset_index(inplace=True,drop=True)
df_pitch.reset_index(inplace=True,drop=True)
df_timbre.reset_index(inplace=True,drop=True)

In [None]:
df_pitch.rename(columns={   0:'C_key_per', 1:'C_Sharp_key_per',2:'D_key_per',
                            3:'D_Sharp_key_per', 4:'E_key_per', 5:'F_key_per',
                            6:'F_Sharp_key_per', 7:'G_key_per', 8:'G_Sharp_key_per',
                            9:'A_key_per', 10:'A_Sharp_key_per', 11:'B_key_per'
                        }, inplace= True)

In [None]:
df_timbre.rename(columns={0:'timbre_frist_dim', 1:'timbre_second_dim',2:'timbre_third_dim',
                            3:'timbre_fourth_dim', 4:'timbre_fifth_dim', 5:'timbre_sixth_dim',
                            6:'timbre_seventh_dim', 7:'timbre_eighth_dim', 8:'timbre_ninenth_dim',
                            9:'timbre_tenth_dim', 10:'timbre_eleventh_dim', 11:'timbre_twelveth_dim'}, inplace=True)

In [None]:
tracks_low_level =pd.merge(tracks_keys, df_pitch,left_index=True, right_index=True)
tracks_low_level= pd.merge(tracks_low_level, df_timbre,left_index=True, right_index=True)

In [None]:
tracks_low_level = pd.merge(tracks_low_level, loudness_low_level,left_index=True, right_index=True)

# Final Merge

Finally, we merge our 3 separate datasets into one, based on the `songs_id`

In [None]:
complete_analysis = pd.merge(tracks_high_level_analysis,tracks_low_level, left_on= 'index', right_on='songs_id')

In [None]:
complete_analysis =  pd.merge(complete_analysis,details_df, on='index')

In [None]:
complete_analysis

Unnamed: 0,index,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,...,timbre_tenth_dim,timbre_eleventh_dim,timbre_twelveth_dim,loudness_start,loudness_max,loudness_max_time,loudness_end,year,explicit,popularity
0,7lPN2DXiMsVn7XUKtOW1CS,0.585,0.436,10,-8.761,1,0.0601,0.72100,0.000013,0.1050,...,-2.546181,-9.382599,3.806793,-20.136485,-11.846137,0.071430,-0.076142,2021,1,33
1,5QO79kh1waicV47BqGRL3g,0.680,0.826,0,-5.487,1,0.0309,0.02120,0.000012,0.5430,...,-3.543406,-9.379946,-2.194406,-12.366643,-5.266731,0.059288,-0.060194,2020,1,90
2,0VjIjW4GlUZAMYd2vXMi3b,0.514,0.730,1,-5.934,1,0.0598,0.00146,0.000095,0.0897,...,2.743308,-12.995984,-0.989638,-13.010345,-6.141568,0.059692,-0.074730,2020,0,94
3,4MzXwWMhyBbmu6hOcLVD49,0.731,0.573,4,-10.059,0,0.0544,0.40100,0.000052,0.1130,...,0.537412,-13.412574,-7.004807,-21.296216,-12.379307,0.059611,-0.074534,2020,1,87
4,5Kskr9LcNYa0tpt5f0ZEJx,0.907,0.393,4,-7.636,0,0.0539,0.45100,0.000001,0.1350,...,2.807862,-21.408289,4.122929,-23.091459,-10.516077,0.055798,-0.077121,2021,1,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1156,4lUmnwRybYH7mMzf16xB0y,0.596,0.650,9,-5.167,1,0.3370,0.13800,0.000000,0.1400,...,6.263427,-21.499195,-6.566107,-16.965315,-6.245689,0.058101,-0.048662,2021,1,77
1157,1fzf9Aad4y1RWrmwosAK5y,0.588,0.850,4,-6.431,1,0.0318,0.16800,0.002020,0.0465,...,-1.181860,-5.934610,0.560949,-14.089263,-7.443791,0.049178,-0.088235,2021,0,11
1158,3E3pb3qH11iny6TFDJvsg5,0.754,0.660,0,-6.811,1,0.2670,0.17900,0.000000,0.1940,...,1.242939,-20.744944,-4.064045,-19.211222,-8.765307,0.061354,-0.064865,2021,1,64
1159,3yTkoTuiKRGL2VAlQd7xsC,0.584,0.836,0,-4.925,1,0.0790,0.05580,0.000000,0.0663,...,5.259177,-13.858022,-6.516408,-12.448574,-5.485316,0.062063,-0.077922,2021,0,0


In [None]:
complete_analysis.to_csv('Data/test_set.csv', index=False)