diff --git a/examples/basic_workflow.py b/examples/basic_workflow.py new file mode 100644 index 000000000..f09c0b05e --- /dev/null +++ b/examples/basic_workflow.py @@ -0,0 +1,48 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + +# Dataset for analysis +FNAME = "employees.csv" + + +# This function gets compiled by Numba* +@njit +def get_analyzed_data(): + df = pd.read_csv(FNAME) + s_bonus = pd.Series(df['Bonus %']) + s_first_name = pd.Series(df['First Name']) + m = s_bonus.mean() + names = s_first_name.sort_values() + return m, names + + +# Printing names and their average bonus percent +mean_bonus, sorted_first_names = get_analyzed_data() +print(sorted_first_names) +print('Average Bonus %:', mean_bonus) diff --git a/examples/basic_workflow_parallel.py b/examples/basic_workflow_parallel.py new file mode 100644 index 000000000..a2a2e764d --- /dev/null +++ b/examples/basic_workflow_parallel.py @@ -0,0 +1,48 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import pandas as pd +from numba import njit + +# Dataset for analysis +FNAME = "employees.csv" + + +# This function gets compiled by Numba* and multi-threaded +@njit(parallel=True) +def get_analyzed_data(): + df = pd.read_csv(FNAME) + s_bonus = pd.Series(df['Bonus %']) + s_first_name = pd.Series(df['First Name']) + m = s_bonus.mean() + names = s_first_name.sort_values() + return m, names + + +# Printing names and their average bonus percent +mean_bonus, sorted_first_names = get_analyzed_data() +print(sorted_first_names) +print('Average Bonus %:', mean_bonus) diff --git a/examples/employees.csv b/examples/employees.csv new file mode 100644 index 000000000..93da22b53 --- /dev/null +++ b/examples/employees.csv @@ -0,0 +1,11 @@ +,Gender,First Name,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team +0,Female,EMILY,2015-12-04,0 days 21:41:41.000000000,39582,1.306,,Business Development +1,Male,NOAH,2011-02-02,0 days 00:04:19.000000000,124742,5.881,False,Client Services +2,Male,ISAAC,1994-10-25,0 days 04:35:46.000000000,138462,14.995,True,Sales +3,Male,,1998-12-24,0 days 18:35:33.000000000,60661,3.477,False,Business Development +4,,CHRISTOPHER,2002-03-05,0 days 12:26:32.000000000,97764,15.65,False, +5,Female,MIA,2005-01-20,0 days 17:47:44.000000000,42104,12.595,False,Product +6,Female,OLIVIA,1994-07-13,0 days 00:43:25.000000000,46646,1.284,False,Client Services +7,Male,ALEXANDER,2019-09-03,0 days 04:15:00.000000000,123807,17.849,True,Legal +8,Male,JOSEPH,2008-09-15,0 days 01:16:05.000000000,139681,19.904,False,Engineering +9,Male,JOSEPH,2009-06-11,0 days 20:27:06.000000000,71359,19.103,False,Client Services diff --git a/examples/Basic_DataFrame.py b/examples/old_examples/Basic_DataFrame.py similarity index 100% rename from examples/Basic_DataFrame.py rename to examples/old_examples/Basic_DataFrame.py diff --git a/examples/accel_example.py b/examples/old_examples/accel_example.py similarity index 100% rename from examples/accel_example.py rename to examples/old_examples/accel_example.py diff --git a/examples/d4p_kmeans.py b/examples/old_examples/d4p_kmeans.py similarity index 100% rename from examples/d4p_kmeans.py rename to examples/old_examples/d4p_kmeans.py diff --git a/examples/d4p_linreg.py b/examples/old_examples/d4p_linreg.py similarity index 100% rename from examples/d4p_linreg.py rename to examples/old_examples/d4p_linreg.py diff --git a/examples/hiframes_concat.py b/examples/old_examples/hiframes_concat.py similarity index 100% rename from examples/hiframes_concat.py rename to examples/old_examples/hiframes_concat.py diff --git a/examples/hiframes_cumsum.py b/examples/old_examples/hiframes_cumsum.py similarity index 100% rename from examples/hiframes_cumsum.py rename to examples/old_examples/hiframes_cumsum.py diff --git a/examples/hiframes_filter.py b/examples/old_examples/hiframes_filter.py similarity index 100% rename from examples/hiframes_filter.py rename to examples/old_examples/hiframes_filter.py diff --git a/examples/hiframes_merge.py b/examples/old_examples/hiframes_merge.py similarity index 100% rename from examples/hiframes_merge.py rename to examples/old_examples/hiframes_merge.py diff --git a/examples/hiframes_pivot.py b/examples/old_examples/hiframes_pivot.py similarity index 100% rename from examples/hiframes_pivot.py rename to examples/old_examples/hiframes_pivot.py diff --git a/examples/hiframes_rolling.py b/examples/old_examples/hiframes_rolling.py similarity index 100% rename from examples/hiframes_rolling.py rename to examples/old_examples/hiframes_rolling.py diff --git a/examples/hiframes_shift.py b/examples/old_examples/hiframes_shift.py similarity index 100% rename from examples/hiframes_shift.py rename to examples/old_examples/hiframes_shift.py diff --git a/examples/hiframes_sort.py b/examples/old_examples/hiframes_sort.py similarity index 100% rename from examples/hiframes_sort.py rename to examples/old_examples/hiframes_sort.py diff --git a/examples/intraday_mean_rand.py b/examples/old_examples/intraday_mean_rand.py similarity index 100% rename from examples/intraday_mean_rand.py rename to examples/old_examples/intraday_mean_rand.py diff --git a/examples/kernel_density_estimation_pq.py b/examples/old_examples/kernel_density_estimation_pq.py similarity index 100% rename from examples/kernel_density_estimation_pq.py rename to examples/old_examples/kernel_density_estimation_pq.py diff --git a/examples/kernel_density_estimation_pq_hdfs.py b/examples/old_examples/kernel_density_estimation_pq_hdfs.py similarity index 100% rename from examples/kernel_density_estimation_pq_hdfs.py rename to examples/old_examples/kernel_density_estimation_pq_hdfs.py diff --git a/examples/logistic_regression_rand.py b/examples/old_examples/logistic_regression_rand.py similarity index 100% rename from examples/logistic_regression_rand.py rename to examples/old_examples/logistic_regression_rand.py diff --git a/examples/pi.py b/examples/old_examples/pi.py similarity index 100% rename from examples/pi.py rename to examples/old_examples/pi.py diff --git a/examples/ros_example1.py b/examples/old_examples/ros_example1.py similarity index 100% rename from examples/ros_example1.py rename to examples/old_examples/ros_example1.py diff --git a/examples/series_basic.py b/examples/old_examples/series_basic.py similarity index 98% rename from examples/series_basic.py rename to examples/old_examples/series_basic.py index 94f317d76..fee05414f 100644 --- a/examples/series_basic.py +++ b/examples/old_examples/series_basic.py @@ -25,31 +25,31 @@ # ***************************************************************************** -import pandas as pd -import numpy as np -import sdc - - -@sdc.jit -def get_mean(df): - ser = pd.Series(df['Bonus %']) - m = ser.mean() - return m - - -@sdc.jit -def sort_name(df): - ser = pd.Series(df['First Name']) - m = ser.sort_values() - return m - - -file = "employees.csv" -df = pd.read_csv(file) - - -# find mean of one column -print(get_mean(df)) - -# Sort the names in ascending order -print(sort_name(df)) +import pandas as pd +import numpy as np +import sdc + + +@sdc.jit +def get_mean(df): + ser = pd.Series(df['Bonus %']) + m = ser.mean() + return m + + +@sdc.jit +def sort_name(df): + ser = pd.Series(df['First Name']) + m = ser.sort_values() + return m + + +file = "employees.csv" +df = pd.read_csv(file) + + +# find mean of one column +print(get_mean(df)) + +# Sort the names in ascending order +print(sort_name(df)) diff --git a/examples/series_at.py b/examples/series_at.py new file mode 100644 index 000000000..dc7109bc0 --- /dev/null +++ b/examples/series_at.py @@ -0,0 +1,37 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd + + +@njit +def series_at(i): + series = pd.Series([5, 4, 3, 2, 1], index=[0, 2, 4, 6, 8]) + return series.at[i] + + +print(series_at(4)) diff --git a/examples/series_getitem.py b/examples/series_getitem.py new file mode 100644 index 000000000..e6ed1c2cf --- /dev/null +++ b/examples/series_getitem.py @@ -0,0 +1,46 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd + + +@njit +def series_getitem(): + series = pd.Series(np.arange(10, 0, -1)) # Series of 10, 9, ..., 1 + + s = series[0] # Accessing series by scalar index + for x in series[2:6]: # Accessing series by a slice + s += x + + indices = pd.Series(np.asarray[1, 6, 7, 8, 9]) + for x in series[indices]: # Accessing series by another series + s += x + + return s # Expect sum of arithmetic progression == 55 + + +print(series_getitem()) diff --git a/examples/series_nlargest.py b/examples/series_nlargest.py new file mode 100644 index 000000000..4296ace42 --- /dev/null +++ b/examples/series_nlargest.py @@ -0,0 +1,37 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd + + +@njit +def series_nlargest(): + series = pd.Series(np.arange(10)) + return series.nlargest(4) + + +print(series_nlargest()) diff --git a/examples/series_nsmallest.py b/examples/series_nsmallest.py new file mode 100644 index 000000000..3fd9020e3 --- /dev/null +++ b/examples/series_nsmallest.py @@ -0,0 +1,37 @@ +# ***************************************************************************** +# Copyright (c) 2019, Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +# OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +# WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +# EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# ***************************************************************************** + +import numpy as np +import pandas as pd + + +@njit +def series_nsmallest(): + series = pd.Series(np.arange(10)) + return series.nsmallest(4) + + +print(series_nsmallest())