In [24]:
import joblib
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, r2_score
from sklearn.model_selection import GridSearchCV


cleaned_dataset_copy = pd.read_csv('cleaned_dataset_copy.csv')

model_lnn = joblib.load('model_lnn.pkl')


X = cleaned_dataset_copy[['has_nfc', 'processor_speed', 'os_android', 'processor_brand_bionic']]
y = cleaned_dataset_copy['price']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

predictions_lnn = model_lnn.predict(X_test)

r_squared = r2_score(y_test, predictions_lnn)

print(f'R-squared LNN: {r_squared:.2f}')


R-squared LNN: 0.53


In [25]:
model_knr = joblib.load('model_knn.pkl')

predictions_knr = model_knr.predict(X_test)

r_squared = r2_score(y_test, predictions_knr)

print(f'R-squared KNR: {r_squared:.2f}')

R-squared KNR: 0.58


# Hyperparameter Tuning
# In order to make my model more accurate I can make use of hyperparameters. Hyperparameters are parameters that are not directly learnt within estimators. In scikit-learn they are passed as arguments to the constructor of the estimator classes. I've decided use GridSearchCV to find the best hyperparameters for my model. The goal is to find the best combination of hyperparameters that will give me the best accuracy score.

In [26]:
print(model_lnn.get_params())

{'copy_X': True, 'fit_intercept': True, 'n_jobs': None, 'positive': False}


In [27]:
print(model_knr.get_params())

{'algorithm': 'auto', 'leaf_size': 30, 'metric': 'minkowski', 'metric_params': None, 'n_jobs': None, 'n_neighbors': 5, 'p': 2, 'weights': 'uniform'}


In [28]:
hyperparameters = {
    'copy_X': [True, False],
    'fit_intercept': [True, False],
    'n_jobs': [None, 1, 2, 3, 4, 5],
    'positive': [True, False]
}

gridsearch = GridSearchCV(model_lnn, hyperparameters, cv=5)

gridsearch.fit(X_train, y_train)

print(gridsearch.best_params_)

{'copy_X': True, 'fit_intercept': False, 'n_jobs': None, 'positive': False}


In [29]:
hyperparameters = {
    'algorithm': ['auto', 'ball_tree', 'kd_tree', 'brute'],
    'leaf_size': [10, 20, 30, 40, 50],
    'n_jobs': [None, 1, 2, 3, 4, 5],
    'p': [1, 2]
}

gridsearchKnr = GridSearchCV(model_knr, hyperparameters, cv=5)

gridsearchKnr.fit(X_train, y_train)

print(gridsearchKnr.best_params_)

{'algorithm': 'auto', 'leaf_size': 10, 'n_jobs': None, 'p': 1}


# After running GridSearchCV on my Linear Regression model I was able to find the best hyperparameters to use. I then used those hyperparameters to create a new model and then tested it on my test data. Alas, after tuning the parameters instead of improving the score decreased.

In [30]:
predictKnr = gridsearchKnr.predict(X_test)

scoreKnr = r2_score(y_test, predictKnr)

print(f'Accuracy Score KNR: {scoreKnr}')

Accuracy Score KNR: 0.5644039268357153


In [31]:
predict = gridsearch.predict(X_test)
# print(predict)

score = r2_score(y_test, predict)
print(f'Accuracy Score Random Forest: {score}')

Accuracy Score Random Forest: 0.5331884219086778


# After tuning the hyperparameters for my Linear Regression model I was able to improve the accuracy ever so slightly. But not with enough to make a big difference.  

# Conclusion
# In conclusion tuning the hyperparameters for me wasn't that beneficial which doesn't mean it can't be. I think it depends on the model and the data you are using. I think it's important to try and tune the hyperparameters to see if it improves the accuracy of your model. If it doesn't then you can always go back to the original model.