In [151]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier


In [152]:
data=pd.read_csv("train_resampled.csv")

In [153]:
data.drop(['Id','Artist Name','Track Name'], axis=1, inplace=True)


def clean_data(df):
    df['Popularity'].fillna(float(df['Popularity'].mean()), inplace=True)
    df['key'].fillna(int(df['key'].mean()), inplace=True)
    df['instrumentalness'].fillna(float(df['instrumentalness'].mean()), inplace=True)
    return df

In [154]:
data = clean_data(data)

In [155]:
data

Unnamed: 0,Popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_in min/ms,time_signature,Class
0,37.000000,0.334,0.536,9.0,-6.649,0,0.0381,0.378000,0.216007,0.1060,0.235,152.429,204947.000000,4,9
1,67.000000,0.725,0.747,11.0,-5.545,1,0.0876,0.027200,0.046800,0.1040,0.380,132.921,191956.000000,4,6
2,42.925307,0.584,0.804,7.0,-6.094,1,0.0619,0.000968,0.635000,0.2840,0.635,159.953,161037.000000,4,10
3,12.000000,0.515,0.308,5.0,-14.711,1,0.0312,0.907000,0.021300,0.3000,0.501,172.472,298093.000000,3,2
4,48.000000,0.565,0.777,6.0,-5.096,0,0.2490,0.183000,0.216007,0.2110,0.619,88.311,254145.000000,4,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43544,85.000000,0.641,0.324,11.0,-5.851,1,0.0299,0.698000,0.216007,0.3280,0.273,110.184,185487.000000,4,9
43545,49.000000,0.957,0.507,5.0,-6.206,1,0.0539,0.045100,0.216007,0.0543,0.817,123.998,2.881783,4,9
43546,32.000000,0.516,0.698,5.0,-8.008,1,0.0310,0.007410,0.008700,0.5750,0.227,84.014,181397.000000,4,9
43547,72.000000,0.674,0.882,3.0,-2.771,0,0.0477,0.000441,0.000065,0.0680,0.682,123.016,205733.000000,4,9


Teacher Model

In [156]:
def split_dataframe(df, n_splits):
    splits = np.array_split(df, n_splits)
    return splits

In [157]:
def make_dataframe(split):
  df=pd.DataFrame(split,columns=data.columns)
  y=df['Class']
  X=df.drop('Class',axis=1)
  return X,y

In [158]:
from sklearn.model_selection import train_test_split
def split_train_test(X,y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)
    return X_train, X_test, y_train, y_test

In [159]:
def add_noise(X_test):
  laplace_scale = 0.1
  laplace_noise = np.random.laplace(loc=0, scale=laplace_scale, size=X_test.shape)
  noisy_data=X_test + laplace_noise
  return noisy_data

In [160]:
def model(X_train,y_train):
  rfc = RandomForestClassifier(random_state = 2022, n_jobs = -1, n_estimators = 1000, verbose=1)
  rfc.fit(X_train, y_train)
  return rfc

In [161]:
def predict(model,X_test):
  y_pred = model.predict(X_test)
  return y_pred

In [162]:
from sklearn.metrics import accuracy_score
def cal_accuracy(y_test,y_pred):
  accuracy = accuracy_score(y_test, y_pred)
  return accuracy

In [163]:
num_splits = 5
result_splits = split_dataframe(data, num_splits)

In [164]:
accuracy=[]
teacher_model=[]
for i in result_splits:
  X,y=make_dataframe(i)
  X_train, X_test, y_train, y_test=split_train_test(X,y)
  teacher=model(X_train,y_train)
  teacher_model.append(teacher)
  y_pred=predict(teacher,X_test)
  acc=cal_accuracy(y_test,y_pred)
  accuracy.append(acc)

[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elapsed:    0.4s
[Parallel(n_jobs=-1)]: Done 418 tasks      | elapsed:    1.1s
[Parallel(n_jobs=-1)]: Done 768 tasks      | elapsed:    2.0s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    2.7s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:    0.1s
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:    0.2s
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:    0.3s finished
[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=-1)]: Done 168 tasks      | elaps

In [165]:
accuracy

[0.521805661820964,
 0.49808722264728383,
 0.5110941086457537,
 0.8355011476664116,
 0.9495026778882938,
 1.0,
 0.9961744452945677,
 0.9671002295332823,
 0.9846977811782709,
 0.9709257842387146]

In [166]:
from collections import Counter
def aggregate_teacher(noised_dp):
  prediction=[]
  for z in range(len(noised_dp)):
    pred=[]
    dp=pd.DataFrame([list(noised_dp.iloc[z])],columns=data.columns[:-1])
    for i in teacher_model:
      pred.append(i.predict(dp)[0])
    #print(pred)
    count=Counter(pred)
    fetal_health=max(count,key=count.get)
    #print("Predicted class ",fetal_health)
    prediction.append(fetal_health)
  return prediction

Student Model

In [167]:
import pickle

In [168]:
def student(data1):
  y=data1['Class']
  X=data1.drop('Class',axis=1)
  X_train, X_test, y_train, y_test=split_train_test(X,y)
  model=RandomForestClassifier(random_state = 2022, n_jobs = -1, n_estimators = 1000, verbose=1)
  noised=add_noise(X_train)
  y_train=aggregate_teacher(noised)
  y_train=pd.DataFrame(y_train,columns=['Class'])
  model.fit(X_train,y_train)
  y_pred = model.predict(X_test)
  accuracy = accuracy_score(y_test, y_pred)
  return accuracy

In [169]:
data1=data[:500]
data1

Unnamed: 0,Popularity,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,duration_in min/ms,time_signature,Class
0,37.000000,0.334,0.536,9.0,-6.649,0,0.0381,0.378000,0.216007,0.1060,0.235,152.429,204947.0,4,9
1,67.000000,0.725,0.747,11.0,-5.545,1,0.0876,0.027200,0.046800,0.1040,0.380,132.921,191956.0,4,6
2,42.925307,0.584,0.804,7.0,-6.094,1,0.0619,0.000968,0.635000,0.2840,0.635,159.953,161037.0,4,10
3,12.000000,0.515,0.308,5.0,-14.711,1,0.0312,0.907000,0.021300,0.3000,0.501,172.472,298093.0,3,2
4,48.000000,0.565,0.777,6.0,-5.096,0,0.2490,0.183000,0.216007,0.2110,0.619,88.311,254145.0,4,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,11.000000,0.375,0.744,5.0,-6.666,0,0.0481,0.001070,0.000109,0.3290,0.687,102.718,162039.0,4,10
496,52.000000,0.220,0.830,6.0,-3.584,0,0.0801,0.004190,0.000031,0.6540,0.457,169.765,263773.0,3,10
497,31.000000,0.302,0.510,3.0,-10.496,0,0.0373,0.164000,0.000003,0.1190,0.497,182.091,215453.0,3,2
498,54.000000,0.670,0.864,5.0,-5.200,0,0.0526,0.090100,0.000026,0.1190,0.732,132.010,268747.0,4,8


In [170]:
import warnings
warnings.filterwarnings('ignore')

In [171]:
print(student(data1))

[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:    0.0s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:    0.0s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 168 tasks      | elaps

[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:    0.0s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:    0.0s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 16 concurrent workers.
[Parallel(n_jobs=16)]: Done  18 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 168 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 418 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 768 tasks      | elapsed:    0.0s
[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:    0.0s finished
[Parallel(n_jobs=16)]: Using backend ThreadingBackend with 

0.48


[Parallel(n_jobs=16)]: Done 1000 out of 1000 | elapsed:    0.1s finished
