In [None]:
! pip install kaggle


Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from google.colab import drive
drive.mount('/content/drive')

! cp /content/drive/MyDrive/kaggle.json ~/
! mkdir ~/.kaggle
! mv ~/kaggle.json ~/.kaggle/kaggle.json
! chmod 600 ~/.kaggle/kaggle.json

Mounted at /content/drive
mkdir: cannot create directory ‘/root/.kaggle’: File exists


In [None]:
! kaggle datasets download -d eldarsultanow/ml-bsd-dataset

Downloading ml-bsd-dataset.zip to /content
100% 1.50G/1.51G [00:13<00:00, 126MB/s]
100% 1.51G/1.51G [00:13<00:00, 123MB/s]


In [None]:
! unzip ml-bsd-dataset.zip

Archive:  ml-bsd-dataset.zip
  inflating: rank-vs-ap.csv          


In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Conv1D, MaxPooling1D, Flatten
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from keras.optimizers import SGD
from keras.callbacks import EarlyStopping


In [None]:
df = pd.read_csv("rank-vs-ap.csv", index_col=False)


In [None]:
X = df.filter(regex='ap_')
y = df['rank']
n_clusters = 7
kmeans = KMeans(n_clusters=n_clusters)
kmeans.fit(X)
df['cluster_label'] = kmeans.labels_



In [None]:
rf_model = RandomForestClassifier()
rf_model.fit(X, y)
rf_importance = rf_model.feature_importances_

In [None]:
feature_importance = pd.DataFrame({'Features': X.columns, 'RF Importance': rf_importance})


In [None]:
feature_importance = feature_importance.sort_values(by='RF Importance', ascending=False)


In [None]:
n = 10
top_features = feature_importance.head(n)['Features'].values
print(f'Top {n} important features: {top_features}')

Top 10 important features: ['ap_2' 'ap_3' 'ap_1' 'ap_4' 'ap_5' 'ap_6' 'ap_8' 'ap_7' 'ap_11' 'ap_9']


In [None]:
X_reduced = df[['cluster_label'] + list(top_features)]
y_reduced = df['rank']

In [None]:
def create_model(kernel_size=3, filters=64, dropout=0.2):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', input_shape=(X_reduced.shape[1], 1)))
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu'))
    model.add(Flatten())
    model.add(Dense(1))
    model.compile(loss='mean_squared_error', optimizer='adam')
    return model

In [None]:
model = KerasRegressor(build_fn=create_model, epochs=10, batch_size=32, verbose=0)
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_mean_squared_error', cv=5)
grid_search.fit(X_reduced.values.reshape((X_reduced.shape[0], X_reduced.shape[1], 1)), y_reduced)

  model = KerasRegressor(build_fn=create_model, epochs=10, batch_size=32, verbose=0)


In [None]:
print("Best parameters: ", grid_search.best_params_)
print("Best MSE score: ", -grid_search.best_score_)


Best parameters:  {'dropout': 0.5, 'filters': 64, 'kernel_size': 3}
Best MSE score:  0.7501420120711112


In [None]:
X = df[['ap_2', 'ap_1', 'ap_3', 'ap_4', 'ap_5', 'ap_6', 'ap_8', 'ap_7', 'ap_9', 'ap_10']]
y = df['rank']

In [None]:
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_reduced, y_reduced, test_size=0.2, random_state=42)


In [None]:
def create_model(filters=64, kernel_size=3, dropout=0.5):
    model = Sequential()
    model.add(Conv1D(filters=filters, kernel_size=kernel_size, activation='relu', input_shape=(X_train.shape[1], 1)))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(dropout))
    model.add(Dense(1))
    model.compile(loss='mse', optimizer='adam')
    return model

In [None]:
model = create_model(filters=64, kernel_size=3, dropout=0.5)


In [None]:
model.compile(optimizer='adam',
              loss='mse',
              metrics=['mae','mse'])

In [None]:
model.fit(X_train.reshape((X_train.shape[0], X_train.shape[1], 1)), y_train, epochs=10, batch_size=32, verbose=0)

<keras.callbacks.History at 0x7f686ede8dc0>

In [None]:
y_pred = model.predict(X_test.reshape((X_test.shape[0], X_test.shape[1], 1)))
mse = mean_squared_error(y_test, y_pred)
accuracy = 1 - (mse/np.var(y_test))



In [None]:
print(f"Accuracy: {accuracy}")


Accuracy: 0.4369534605732711


In [None]:
sgd = SGD(lr=0.01, momentum=0.9)

model.compile(optimizer=sgd,
              loss='mse',
              metrics=['mae', 'mse'])

  super().__init__(name, **kwargs)


In [None]:
history = model.fit(X_train.reshape((X_train.shape[0], X_train.shape[1], 1)), y_train,
                    epochs=50,
                    batch_size=32,
                    verbose=0,
                    validation_data=(X_test.reshape((X_test.shape[0], X_test.shape[1], 1)), y_test),
                    callbacks=[EarlyStopping(monitor='val_loss', patience=10)])

In [None]:
mse, mae, accuracy = model.evaluate(X_test.reshape((X_test.shape[0], X_test.shape[1], 1)), y_test, verbose=0)

print("Accuracy:", accuracy)

Accuracy: 0.5308513641357422
