Skip to content

Commit

Permalink
Apply black, usort and flake8, and create linting job (#420)
Browse files Browse the repository at this point in the history
  • Loading branch information
NicolasHug committed Aug 14, 2022
1 parent 2e6ae0e commit 9750b88
Show file tree
Hide file tree
Showing 68 changed files with 1,823 additions and 1,539 deletions.
37 changes: 37 additions & 0 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# This workflow will run lint.sh

name: Lint

on:
push:
branches: [ "master" ]
pull_request:
branches: [ "master" ]

jobs:
build:

runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
python-version: ["3.9"]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install "black==22.6.0" "flake8==5.0.4" "usort==1.0.4"
pip install -r requirements_dev.txt
- name: Install Surprise
run: |
python -m pip install -e . # Not sure it's needed but whatevs
- name: Run linters and formatters
run: |
chmod +x lint.sh
./lint.sh
2 changes: 2 additions & 0 deletions doc/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,8 @@
# Add any paths that contain templates here, relative to this directory.
templates_path = ['.templates']

bibtex_bibfiles = ['refs.bib']

# The suffix(es) of source filenames.
# You can specify multiple suffix as a list of string:
# source_suffix = ['.rst', '.md']
Expand Down
34 changes: 15 additions & 19 deletions examples/baselines_conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,41 +3,37 @@
computation.
"""

from surprise import BaselineOnly
from surprise import KNNBasic
from surprise import Dataset
from surprise import BaselineOnly, Dataset, KNNBasic
from surprise.model_selection import cross_validate


# Load the movielens-100k dataset.
data = Dataset.load_builtin('ml-100k')
data = Dataset.load_builtin("ml-100k")

# Example using ALS
print('Using ALS')
bsl_options = {'method': 'als',
'n_epochs': 5,
'reg_u': 12,
'reg_i': 5
}
print("Using ALS")
bsl_options = {"method": "als", "n_epochs": 5, "reg_u": 12, "reg_i": 5}
algo = BaselineOnly(bsl_options=bsl_options)

cross_validate(algo, data, verbose=True)

# Example using SGD
print('Using SGD')
bsl_options = {'method': 'sgd',
'learning_rate': .00005,
}
print("Using SGD")
bsl_options = {
"method": "sgd",
"learning_rate": 0.00005,
}
algo = BaselineOnly(bsl_options=bsl_options)

cross_validate(algo, data, verbose=True)

# Some similarity measures may use baselines. It works just the same.
print('Using ALS with pearson_baseline similarity')
bsl_options = {'method': 'als',
'n_epochs': 20,
}
sim_options = {'name': 'pearson_baseline'}
print("Using ALS with pearson_baseline similarity")
bsl_options = {
"method": "als",
"n_epochs": 20,
}
sim_options = {"name": "pearson_baseline"}
algo = KNNBasic(bsl_options=bsl_options, sim_options=sim_options)

cross_validate(algo, data, verbose=True)
9 changes: 3 additions & 6 deletions examples/basic_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,19 +3,16 @@
algorithm, (down)load a dataset and run a cross-validation procedure.
"""

from __future__ import (absolute_import, division, print_function,
unicode_literals)

from surprise import SVD
from surprise import Dataset
from surprise import Dataset, SVD
from surprise.model_selection import cross_validate


# Load the movielens-100k dataset (download it if needed),
data = Dataset.load_builtin('ml-100k')
data = Dataset.load_builtin("ml-100k")

# We'll use the famous SVD algorithm.
algo = SVD()

# Run 5-fold cross-validation and print results
cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=True)
cross_validate(algo, data, measures=["RMSE", "MAE"], cv=5, verbose=True)
154 changes: 90 additions & 64 deletions examples/benchmark.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,96 +2,122 @@
the movielens datasets, and reports average RMSE, MAE, and total computation
time. It is used for making tables in the README.md file"""

from __future__ import (absolute_import, division, print_function,
unicode_literals)
import time
# flake8: noqa

import datetime
import random
import time

import numpy as np
from tabulate import tabulate

from surprise import Dataset
from surprise.model_selection import cross_validate
from surprise.model_selection import KFold
from surprise import NormalPredictor
from surprise import BaselineOnly
from surprise import KNNBasic
from surprise import KNNWithMeans
from surprise import KNNBaseline
from surprise import SVD
from surprise import SVDpp
from surprise import NMF
from surprise import SlopeOne
from surprise import CoClustering
from surprise import (
BaselineOnly,
CoClustering,
Dataset,
KNNBaseline,
KNNBasic,
KNNWithMeans,
NMF,
NormalPredictor,
SlopeOne,
SVD,
SVDpp,
)
from surprise.model_selection import cross_validate, KFold
from tabulate import tabulate

# The algorithms to cross-validate
classes = (SVD, SVDpp, NMF, SlopeOne, KNNBasic, KNNWithMeans, KNNBaseline,
CoClustering, BaselineOnly, NormalPredictor)
classes = (
SVD,
SVDpp,
NMF,
SlopeOne,
KNNBasic,
KNNWithMeans,
KNNBaseline,
CoClustering,
BaselineOnly,
NormalPredictor,
)

# ugly dict to map algo names and datasets to their markdown links in the table
stable = 'http://surprise.readthedocs.io/en/stable/'
LINK = {'SVD': '[{}]({})'.format('SVD',
stable +
'matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVD'),
'SVDpp': '[{}]({})'.format('SVD++',
stable +
'matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVDpp'),
'NMF': '[{}]({})'.format('NMF',
stable +
'matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.NMF'),
'SlopeOne': '[{}]({})'.format('Slope One',
stable +
'slope_one.html#surprise.prediction_algorithms.slope_one.SlopeOne'),
'KNNBasic': '[{}]({})'.format('k-NN',
stable +
'knn_inspired.html#surprise.prediction_algorithms.knns.KNNBasic'),
'KNNWithMeans': '[{}]({})'.format('Centered k-NN',
stable +
'knn_inspired.html#surprise.prediction_algorithms.knns.KNNWithMeans'),
'KNNBaseline': '[{}]({})'.format('k-NN Baseline',
stable +
'knn_inspired.html#surprise.prediction_algorithms.knns.KNNBaseline'),
'CoClustering': '[{}]({})'.format('Co-Clustering',
stable +
'co_clustering.html#surprise.prediction_algorithms.co_clustering.CoClustering'),
'BaselineOnly': '[{}]({})'.format('Baseline',
stable +
'basic_algorithms.html#surprise.prediction_algorithms.baseline_only.BaselineOnly'),
'NormalPredictor': '[{}]({})'.format('Random',
stable +
'basic_algorithms.html#surprise.prediction_algorithms.random_pred.NormalPredictor'),
'ml-100k': '[{}]({})'.format('Movielens 100k',
'http://grouplens.org/datasets/movielens/100k'),
'ml-1m': '[{}]({})'.format('Movielens 1M',
'http://grouplens.org/datasets/movielens/1m'),
}
stable = "http://surprise.readthedocs.io/en/stable/"
LINK = {
"SVD": "[{}]({})".format(
"SVD",
stable
+ "matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVD",
),
"SVDpp": "[{}]({})".format(
"SVD++",
stable
+ "matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.SVDpp",
),
"NMF": "[{}]({})".format(
"NMF",
stable
+ "matrix_factorization.html#surprise.prediction_algorithms.matrix_factorization.NMF",
),
"SlopeOne": "[{}]({})".format(
"Slope One",
stable + "slope_one.html#surprise.prediction_algorithms.slope_one.SlopeOne",
),
"KNNBasic": "[{}]({})".format(
"k-NN",
stable + "knn_inspired.html#surprise.prediction_algorithms.knns.KNNBasic",
),
"KNNWithMeans": "[{}]({})".format(
"Centered k-NN",
stable + "knn_inspired.html#surprise.prediction_algorithms.knns.KNNWithMeans",
),
"KNNBaseline": "[{}]({})".format(
"k-NN Baseline",
stable + "knn_inspired.html#surprise.prediction_algorithms.knns.KNNBaseline",
),
"CoClustering": "[{}]({})".format(
"Co-Clustering",
stable
+ "co_clustering.html#surprise.prediction_algorithms.co_clustering.CoClustering",
),
"BaselineOnly": "[{}]({})".format(
"Baseline",
stable
+ "basic_algorithms.html#surprise.prediction_algorithms.baseline_only.BaselineOnly",
),
"NormalPredictor": "[{}]({})".format(
"Random",
stable
+ "basic_algorithms.html#surprise.prediction_algorithms.random_pred.NormalPredictor",
),
"ml-100k": "[{}]({})".format(
"Movielens 100k", "http://grouplens.org/datasets/movielens/100k"
),
"ml-1m": "[{}]({})".format(
"Movielens 1M", "http://grouplens.org/datasets/movielens/1m"
),
}


# set RNG
np.random.seed(0)
random.seed(0)

dataset = 'ml-1m'
dataset = "ml-1m"
data = Dataset.load_builtin(dataset)
kf = KFold(random_state=0) # folds will be the same for all algorithms.

table = []
for klass in classes:
start = time.time()
out = cross_validate(klass(), data, ['rmse', 'mae'], kf)
out = cross_validate(klass(), data, ["rmse", "mae"], kf)
cv_time = str(datetime.timedelta(seconds=int(time.time() - start)))
link = LINK[klass.__name__]
mean_rmse = '{:.3f}'.format(np.mean(out['test_rmse']))
mean_mae = '{:.3f}'.format(np.mean(out['test_mae']))
mean_rmse = "{:.3f}".format(np.mean(out["test_rmse"]))
mean_mae = "{:.3f}".format(np.mean(out["test_mae"]))

new_line = [link, mean_rmse, mean_mae, cv_time]
print(tabulate([new_line], tablefmt="pipe")) # print current algo perf
table.append(new_line)

header = [LINK[dataset],
'RMSE',
'MAE',
'Time'
]
header = [LINK[dataset], "RMSE", "MAE", "Time"]
print(tabulate(table, header, tablefmt="pipe"))
8 changes: 2 additions & 6 deletions examples/building_custom_algorithms/mean_rating_user_item.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,14 @@
to User Guide for more insight.
"""

from __future__ import (absolute_import, division, print_function,
unicode_literals)

import numpy as np

from surprise import AlgoBase
from surprise import Dataset
from surprise import AlgoBase, Dataset
from surprise.model_selection import cross_validate


class MyOwnAlgorithm(AlgoBase):

def __init__(self):

# Always call base method before doing anything.
Expand All @@ -35,7 +31,7 @@ def estimate(self, u, i):
return sum_means / div


data = Dataset.load_builtin('ml-100k')
data = Dataset.load_builtin("ml-100k")
algo = MyOwnAlgorithm()

cross_validate(algo, data, verbose=True)
8 changes: 2 additions & 6 deletions examples/building_custom_algorithms/most_basic_algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,12 @@
to User Guide for more insight.
"""

from __future__ import (absolute_import, division, print_function,
unicode_literals)

from surprise import AlgoBase
from surprise import Dataset
from surprise import AlgoBase, Dataset
from surprise.model_selection import cross_validate


class MyOwnAlgorithm(AlgoBase):

def __init__(self):

# Always call base method before doing anything.
Expand All @@ -23,7 +19,7 @@ def estimate(self, u, i):
return 3


data = Dataset.load_builtin('ml-100k')
data = Dataset.load_builtin("ml-100k")
algo = MyOwnAlgorithm()

cross_validate(algo, data, verbose=True)

0 comments on commit 9750b88

Please sign in to comment.