From e581149847df322a63d704b567b6483ff2a0e4ff Mon Sep 17 00:00:00 2001 From: samaid Date: Sun, 8 Dec 2019 09:53:31 -0600 Subject: [PATCH] Refactored Examples. Old examples are moved under old_examples folder (will be reworked or removed by Beta release). These do not need to be included in CI. Added new examples for illustration purposes on how these examples are used and referenced in the documantation. Note that I am still unable to get SDC working out of the box with @njit. All these examples will fail in default settings of SDC. SDC must be fixed, not examples --- examples/basic_workflow.py | 48 ++++++++++++++++ examples/basic_workflow_parallel.py | 48 ++++++++++++++++ examples/employees.csv | 11 ++++ .../{ => old_examples}/Basic_DataFrame.py | 0 examples/{ => old_examples}/accel_example.py | 0 examples/{ => old_examples}/d4p_kmeans.py | 0 examples/{ => old_examples}/d4p_linreg.py | 0 .../{ => old_examples}/hiframes_concat.py | 0 .../{ => old_examples}/hiframes_cumsum.py | 0 .../{ => old_examples}/hiframes_filter.py | 0 examples/{ => old_examples}/hiframes_merge.py | 0 examples/{ => old_examples}/hiframes_pivot.py | 0 .../{ => old_examples}/hiframes_rolling.py | 0 examples/{ => old_examples}/hiframes_shift.py | 0 examples/{ => old_examples}/hiframes_sort.py | 0 .../{ => old_examples}/intraday_mean_rand.py | 0 .../kernel_density_estimation_pq.py | 0 .../kernel_density_estimation_pq_hdfs.py | 0 .../logistic_regression_rand.py | 0 examples/{ => old_examples}/pi.py | 0 examples/{ => old_examples}/ros_example1.py | 0 examples/{ => old_examples}/series_basic.py | 56 +++++++++---------- examples/series_at.py | 37 ++++++++++++ examples/series_getitem.py | 46 +++++++++++++++ examples/series_nlargest.py | 37 ++++++++++++ examples/series_nsmallest.py | 37 ++++++++++++ 26 files changed, 292 insertions(+), 28 deletions(-) create mode 100644 examples/basic_workflow.py create mode 100644 examples/basic_workflow_parallel.py create mode 100644 examples/employees.csv rename examples/{ => old_examples}/Basic_DataFrame.py (100%) rename examples/{ => old_examples}/accel_example.py (100%) rename examples/{ => old_examples}/d4p_kmeans.py (100%) rename examples/{ => old_examples}/d4p_linreg.py (100%) rename examples/{ => old_examples}/hiframes_concat.py (100%) rename examples/{ => old_examples}/hiframes_cumsum.py (100%) rename examples/{ => old_examples}/hiframes_filter.py (100%) rename examples/{ => old_examples}/hiframes_merge.py (100%) rename examples/{ => old_examples}/hiframes_pivot.py (100%) rename examples/{ => old_examples}/hiframes_rolling.py (100%) rename examples/{ => old_examples}/hiframes_shift.py (100%) rename examples/{ => old_examples}/hiframes_sort.py (100%) rename examples/{ => old_examples}/intraday_mean_rand.py (100%) rename examples/{ => old_examples}/kernel_density_estimation_pq.py (100%) rename examples/{ => old_examples}/kernel_density_estimation_pq_hdfs.py (100%) rename examples/{ => old_examples}/logistic_regression_rand.py (100%) rename examples/{ => old_examples}/pi.py (100%) rename examples/{ => old_examples}/ros_example1.py (100%) rename examples/{ => old_examples}/series_basic.py (98%) create mode 100644 examples/series_at.py create mode 100644 examples/series_getitem.py create mode 100644 examples/series_nlargest.py create mode 100644 examples/series_nsmallest.py diff --git a/examples/basic_workflow.py b/examples/basic_workflow.py new file mode 100644 index 000000000..f09c0b05e --- /dev/null +++ b/examples/basic_workflow.py @@ -0,0 +1,48 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + +# Dataset for analysis +FNAME = "employees.csv" + + +# This function gets compiled by Numba* +@njit +def get_analyzed_data(): + df = pd.read_csv(FNAME) + s_bonus = pd.Series(df['Bonus %']) + s_first_name = pd.Series(df['First Name']) + m = s_bonus.mean() + names = s_first_name.sort_values() + return m, names + + +# Printing names and their average bonus percent +mean_bonus, sorted_first_names = get_analyzed_data() +print(sorted_first_names) +print('Average Bonus %:', mean_bonus) diff --git a/examples/basic_workflow_parallel.py b/examples/basic_workflow_parallel.py new file mode 100644 index 000000000..a2a2e764d --- /dev/null +++ b/examples/basic_workflow_parallel.py @@ -0,0 +1,48 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + +# Dataset for analysis +FNAME = "employees.csv" + + +# This function gets compiled by Numba* and multi-threaded +@njit(parallel=True) +def get_analyzed_data(): + df = pd.read_csv(FNAME) + s_bonus = pd.Series(df['Bonus %']) + s_first_name = pd.Series(df['First Name']) + m = s_bonus.mean() + names = s_first_name.sort_values() + return m, names + + +# Printing names and their average bonus percent +mean_bonus, sorted_first_names = get_analyzed_data() +print(sorted_first_names) +print('Average Bonus %:', mean_bonus) diff --git a/examples/employees.csv b/examples/employees.csv new file mode 100644 index 000000000..93da22b53 --- /dev/null +++ b/examples/employees.csv @@ -0,0 +1,11 @@ +,Gender,First Name,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team +0,Female,EMILY,2015-12-04,0 days 21:41:41.000000000,39582,1.306,,Business Development +1,Male,NOAH,2011-02-02,0 days 00:04:19.000000000,124742,5.881,False,Client Services +2,Male,ISAAC,1994-10-25,0 days 04:35:46.000000000,138462,14.995,True,Sales +3,Male,,1998-12-24,0 days 18:35:33.000000000,60661,3.477,False,Business Development +4,,CHRISTOPHER,2002-03-05,0 days 12:26:32.000000000,97764,15.65,False, +5,Female,MIA,2005-01-20,0 days 17:47:44.000000000,42104,12.595,False,Product +6,Female,OLIVIA,1994-07-13,0 days 00:43:25.000000000,46646,1.284,False,Client Services +7,Male,ALEXANDER,2019-09-03,0 days 04:15:00.000000000,123807,17.849,True,Legal +8,Male,JOSEPH,2008-09-15,0 days 01:16:05.000000000,139681,19.904,False,Engineering +9,Male,JOSEPH,2009-06-11,0 days 20:27:06.000000000,71359,19.103,False,Client Services diff --git a/examples/Basic_DataFrame.py b/examples/old_examples/Basic_DataFrame.py similarity index 100% rename from examples/Basic_DataFrame.py rename to examples/old_examples/Basic_DataFrame.py diff --git a/examples/accel_example.py b/examples/old_examples/accel_example.py similarity index 100% rename from examples/accel_example.py rename to examples/old_examples/accel_example.py diff --git a/examples/d4p_kmeans.py b/examples/old_examples/d4p_kmeans.py similarity index 100% rename from examples/d4p_kmeans.py rename to examples/old_examples/d4p_kmeans.py diff --git a/examples/d4p_linreg.py b/examples/old_examples/d4p_linreg.py similarity index 100% rename from examples/d4p_linreg.py rename to examples/old_examples/d4p_linreg.py diff --git a/examples/hiframes_concat.py b/examples/old_examples/hiframes_concat.py similarity index 100% rename from examples/hiframes_concat.py rename to examples/old_examples/hiframes_concat.py diff --git a/examples/hiframes_cumsum.py b/examples/old_examples/hiframes_cumsum.py similarity index 100% rename from examples/hiframes_cumsum.py rename to examples/old_examples/hiframes_cumsum.py diff --git a/examples/hiframes_filter.py b/examples/old_examples/hiframes_filter.py similarity index 100% rename from examples/hiframes_filter.py rename to examples/old_examples/hiframes_filter.py diff --git a/examples/hiframes_merge.py b/examples/old_examples/hiframes_merge.py similarity index 100% rename from examples/hiframes_merge.py rename to examples/old_examples/hiframes_merge.py diff --git a/examples/hiframes_pivot.py b/examples/old_examples/hiframes_pivot.py similarity index 100% rename from examples/hiframes_pivot.py rename to examples/old_examples/hiframes_pivot.py diff --git a/examples/hiframes_rolling.py b/examples/old_examples/hiframes_rolling.py similarity index 100% rename from examples/hiframes_rolling.py rename to examples/old_examples/hiframes_rolling.py diff --git a/examples/hiframes_shift.py b/examples/old_examples/hiframes_shift.py similarity index 100% rename from examples/hiframes_shift.py rename to examples/old_examples/hiframes_shift.py diff --git a/examples/hiframes_sort.py b/examples/old_examples/hiframes_sort.py similarity index 100% rename from examples/hiframes_sort.py rename to examples/old_examples/hiframes_sort.py diff --git a/examples/intraday_mean_rand.py b/examples/old_examples/intraday_mean_rand.py similarity index 100% rename from examples/intraday_mean_rand.py rename to examples/old_examples/intraday_mean_rand.py diff --git a/examples/kernel_density_estimation_pq.py b/examples/old_examples/kernel_density_estimation_pq.py similarity index 100% rename from examples/kernel_density_estimation_pq.py rename to examples/old_examples/kernel_density_estimation_pq.py diff --git a/examples/kernel_density_estimation_pq_hdfs.py b/examples/old_examples/kernel_density_estimation_pq_hdfs.py similarity index 100% rename from examples/kernel_density_estimation_pq_hdfs.py rename to examples/old_examples/kernel_density_estimation_pq_hdfs.py diff --git a/examples/logistic_regression_rand.py b/examples/old_examples/logistic_regression_rand.py similarity index 100% rename from examples/logistic_regression_rand.py rename to examples/old_examples/logistic_regression_rand.py diff --git a/examples/pi.py b/examples/old_examples/pi.py similarity index 100% rename from examples/pi.py rename to examples/old_examples/pi.py diff --git a/examples/ros_example1.py b/examples/old_examples/ros_example1.py similarity index 100% rename from examples/ros_example1.py rename to examples/old_examples/ros_example1.py diff --git a/examples/series_basic.py b/examples/old_examples/series_basic.py similarity index 98% rename from examples/series_basic.py rename to examples/old_examples/series_basic.py index 94f317d76..fee05414f 100644 --- a/examples/series_basic.py +++ b/examples/old_examples/series_basic.py @@ -25,31 +25,31 @@ # ***************************************************************************** -import pandas as pd -import numpy as np -import sdc - - -@sdc.jit -def get_mean(df): - ser = pd.Series(df['Bonus %']) - m = ser.mean() - return m - - -@sdc.jit -def sort_name(df): - ser = pd.Series(df['First Name']) - m = ser.sort_values() - return m - - -file = "employees.csv" -df = pd.read_csv(file) - - -# find mean of one column -print(get_mean(df)) - -# Sort the names in ascending order -print(sort_name(df)) +import pandas as pd +import numpy as np +import sdc + + +@sdc.jit +def get_mean(df): + ser = pd.Series(df['Bonus %']) + m = ser.mean() + return m + + +@sdc.jit +def sort_name(df): + ser = pd.Series(df['First Name']) + m = ser.sort_values() + return m + + +file = "employees.csv" +df = pd.read_csv(file) + + +# find mean of one column +print(get_mean(df)) + +# Sort the names in ascending order +print(sort_name(df)) diff --git a/examples/series_at.py b/examples/series_at.py new file mode 100644 index 000000000..dc7109bc0 --- /dev/null +++ b/examples/series_at.py @@ -0,0 +1,37 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd + + +@njit +def series_at(i): + series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8]) + return series.at[i] + + +print(series_at(4)) diff --git a/examples/series_getitem.py b/examples/series_getitem.py new file mode 100644 index 000000000..e6ed1c2cf --- /dev/null +++ b/examples/series_getitem.py @@ -0,0 +1,46 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd + + +@njit +def series_getitem(): + series = pd.Series(np.arange(10, 0, -1)) # Series of 10, 9, ..., 1 + + s = series[0] # Accessing series by scalar index + for x in series[2:6]: # Accessing series by a slice + s += x + + indices = pd.Series(np.asarray[1, 6, 7, 8, 9]) + for x in series[indices]: # Accessing series by another series + s += x + + return s # Expect sum of arithmetic progression == 55 + + +print(series_getitem()) diff --git a/examples/series_nlargest.py b/examples/series_nlargest.py new file mode 100644 index 000000000..4296ace42 --- /dev/null +++ b/examples/series_nlargest.py @@ -0,0 +1,37 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd + + +@njit +def series_nlargest(): + series = pd.Series(np.arange(10)) + return series.nlargest(4) + + +print(series_nlargest()) diff --git a/examples/series_nsmallest.py b/examples/series_nsmallest.py new file mode 100644 index 000000000..3fd9020e3 --- /dev/null +++ b/examples/series_nsmallest.py @@ -0,0 +1,37 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd + + +@njit +def series_nsmallest(): + series = pd.Series(np.arange(10)) + return series.nsmallest(4) + + +print(series_nsmallest())