diff --git a/examples/accel_example.py b/examples/accel_example.py index 0aef4963d..8b230eaf0 100644 --- a/examples/accel_example.py +++ b/examples/accel_example.py @@ -15,15 +15,16 @@ def accel_infer(n): df = pd.DataFrame({'X': X, 'Y': Y, 'Z': Z}) g = 9.81 - df['accel'] = np.sqrt(df.X**2 + df.Y**2 + (df.Z-g)**2) + df['accel'] = np.sqrt(df.X**2 + df.Y**2 + (df.Z - g)**2) threshold = df.accel.mean() + 5 * df.accel.std() df['is_brake'] = (df.rolling(10)['accel'].mean() > threshold) df.is_brake.fillna(False, inplace=True) checksum = df.is_brake.sum() t2 = time.time() - print("exec time:", t2-t1) + print("exec time:", t2 - t1) return checksum + n = 10**8 accel_infer(n) diff --git a/examples/d4p_kmeans.py b/examples/d4p_kmeans.py index 94b258081..492487462 100644 --- a/examples/d4p_kmeans.py +++ b/examples/d4p_kmeans.py @@ -3,14 +3,16 @@ import hpat import numpy as np + @hpat.jit(nopython=True) def kmeans(N, D, nClusters, maxit): - a = np.random.ranf((N,D)) # doesn't make much sense, but ok for now + a = np.random.ranf((N, D)) # doesn't make much sense, but ok for now kmi = daal4py.kmeans_init(nClusters, method='plusPlusDense') km = daal4py.kmeans(nClusters, maxit) kmr = km.compute(a, kmi.compute(a).centroids) return (kmr.centroids, kmr.assignments, kmr.objectiveFunction, kmr.goalFunction, kmr.nIterations) + print(kmeans(10000, 20, 2, 30)) hpat.distribution_report() diff --git a/examples/d4p_linreg.py b/examples/d4p_linreg.py index 2f27c5604..b866caeb1 100644 --- a/examples/d4p_linreg.py +++ b/examples/d4p_linreg.py @@ -3,17 +3,20 @@ import hpat import numpy as np + @hpat.jit def lr_predict(N, D, model): - data = np.random.ranf((N/2,D)) + data = np.random.ranf((N / 2, D)) return daal4py.linear_regression_prediction().compute(data, model) + @hpat.jit def lr_train(N, D): - data = np.random.ranf((N,D)) - gt = np.random.ranf((N,2)) + data = np.random.ranf((N, D)) + gt = np.random.ranf((N, 2)) return daal4py.linear_regression_training(interceptFlag=True, method='qrDense').compute(data, gt) + t_res = lr_train(1000, 10) p_res = lr_predict(1000, 10, t_res.model) diff --git a/examples/hiframes_concat.py b/examples/hiframes_concat.py index 378484560..b9df34c84 100644 --- a/examples/hiframes_concat.py +++ b/examples/hiframes_concat.py @@ -2,13 +2,15 @@ import numpy as np import hpat + @hpat.jit def concat_df(n): - df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n)+1.0}) - df2 = pd.DataFrame({'key2': n-np.arange(n), 'A': n+np.arange(n)+1.0}) + df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n) + 1.0}) + df2 = pd.DataFrame({'key2': n - np.arange(n), 'A': n + np.arange(n) + 1.0}) df3 = pd.concat([df1, df2]) return df3.key2.sum() + n = 10 print(concat_df(n)) -#hpat.distribution_report() +# hpat.distribution_report() diff --git a/examples/hiframes_cumsum.py b/examples/hiframes_cumsum.py index 991a84575..21d24771d 100644 --- a/examples/hiframes_cumsum.py +++ b/examples/hiframes_cumsum.py @@ -2,12 +2,14 @@ import numpy as np import hpat + @hpat.jit def cumsum_df(n): - df = pd.DataFrame({'A': np.arange(n)+1.0, 'B': np.random.ranf(n)}) + df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.random.ranf(n)}) Ac = df.A.cumsum() return Ac.sum() + n = 10 print(cumsum_df(n)) hpat.distribution_report() diff --git a/examples/hiframes_filter.py b/examples/hiframes_filter.py index efc6bd75e..c5f281f2b 100644 --- a/examples/hiframes_filter.py +++ b/examples/hiframes_filter.py @@ -2,11 +2,13 @@ import numpy as np import hpat + @hpat.jit def filter_df(n): df = pd.DataFrame({'A': np.random.ranf(n), 'B': np.random.ranf(n)}) df1 = df[df.A > .5] return np.sum(df1.B) + n = 10 print(filter_df(n)) diff --git a/examples/hiframes_merge.py b/examples/hiframes_merge.py index 8cd222723..7de5d2bd8 100644 --- a/examples/hiframes_merge.py +++ b/examples/hiframes_merge.py @@ -2,12 +2,14 @@ import numpy as np import hpat + @hpat.jit def merge_df(n): - df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n)+1.0}) - df2 = pd.DataFrame({'key2': n-np.arange(n), 'B': n+np.arange(n)+1.0}) + df1 = pd.DataFrame({'key1': np.arange(n), 'A': np.arange(n) + 1.0}) + df2 = pd.DataFrame({'key2': n - np.arange(n), 'B': n + np.arange(n) + 1.0}) df3 = pd.merge(df1, df2, left_on='key1', right_on='key2') return df3.B.sum() + n = 10 print(merge_df(n)) diff --git a/examples/hiframes_pivot.py b/examples/hiframes_pivot.py index 77b407ef5..8445a930e 100644 --- a/examples/hiframes_pivot.py +++ b/examples/hiframes_pivot.py @@ -2,6 +2,7 @@ import numpy as np import hpat + @hpat.jit(pivots={'pt': ['small', 'large']}) def df_pivot(df): pt = df.pivot_table(index='A', columns='C', values='D', aggfunc='sum') @@ -11,12 +12,12 @@ def df_pivot(df): df = pd.DataFrame({"A": ["foo", "foo", "foo", "foo", "foo", - "bar", "bar", "bar", "bar"], - "B": ["one", "one", "one", "two", "two", - "one", "one", "two", "two"], - "C": ["small", "large", "large", "small", - "small", "large", "small", "small", - "large"], - "D": [1, 2, 2, 6, 3, 4, 5, 6, 9]}) + "bar", "bar", "bar", "bar"], + "B": ["one", "one", "one", "two", "two", + "one", "one", "two", "two"], + "C": ["small", "large", "large", "small", + "small", "large", "small", "small", + "large"], + "D": [1, 2, 2, 6, 3, 4, 5, 6, 9]}) -df_pivot(df) \ No newline at end of file +df_pivot(df) diff --git a/examples/hiframes_rolling.py b/examples/hiframes_rolling.py index a0ce68c09..cd1c64209 100644 --- a/examples/hiframes_rolling.py +++ b/examples/hiframes_rolling.py @@ -2,24 +2,28 @@ import numpy as np import hpat + @hpat.jit def rolling_df1(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) Ac = df.A.rolling(5).sum() return Ac.sum() + @hpat.jit def rolling_df2(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) df['moving average'] = df.A.rolling(window=5, center=True).mean() return df['moving average'].sum() + @hpat.jit def rolling_df3(n): df = pd.DataFrame({'A': np.arange(n), 'B': np.random.ranf(n)}) - Ac = df.A.rolling(3, center=True).apply(lambda a: a[0]+2*a[1]+a[2]) + Ac = df.A.rolling(3, center=True).apply(lambda a: a[0] + 2 * a[1] + a[2]) return Ac.sum() + n = 10 print("sum left window:") print(rolling_df1(n)) diff --git a/examples/hiframes_shift.py b/examples/hiframes_shift.py index aad76ccdd..1f9147266 100644 --- a/examples/hiframes_shift.py +++ b/examples/hiframes_shift.py @@ -2,18 +2,21 @@ import numpy as np import hpat + @hpat.jit def shift_df1(n): - df = pd.DataFrame({'A': np.arange(n)+1.0, 'B': np.random.ranf(n)}) + df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.random.ranf(n)}) Ac = df.A.shift(1) return Ac.sum() + @hpat.jit def shift_df2(n): - df = pd.DataFrame({'A': np.arange(n)+1.0, 'B': np.random.ranf(n)}) + df = pd.DataFrame({'A': np.arange(n) + 1.0, 'B': np.random.ranf(n)}) Ac = df.A.pct_change() return Ac + n = 10 print("shift 1:") print(shift_df1(n)) diff --git a/examples/hiframes_sort.py b/examples/hiframes_sort.py index 27f6116a4..723279ab1 100644 --- a/examples/hiframes_sort.py +++ b/examples/hiframes_sort.py @@ -2,12 +2,14 @@ import numpy as np import hpat + @hpat.jit def df_sort(df): df2 = df.sort_values('A') print(df2.A.values) print(df2.B.values) + n = 11 df = pd.DataFrame({'A': np.random.ranf(n), 'B': np.arange(n), 'C': np.random.ranf(n)}) # computation is sequential since df is passed in diff --git a/examples/intraday_mean.py b/examples/intraday_mean.py index 1b4c55bb1..f843afea8 100644 --- a/examples/intraday_mean.py +++ b/examples/intraday_mean.py @@ -8,9 +8,10 @@ # adopted from: # http://www.pythonforfinance.net/2017/02/20/intraday-stock-mean-reversion-trading-backtest-in-python/ + @hpat.jit(locals={'s_open': hpat.float64[:], 's_high': hpat.float64[:], - 's_low': hpat.float64[:], 's_close': hpat.float64[:], - 's_vol': hpat.float64[:]}) + 's_low': hpat.float64[:], 's_close': hpat.float64[:], + 's_vol': hpat.float64[:]}) def intraday_mean_revert(): file_name = "stock_data_all_google.hdf5" f = h5py.File(file_name, "r") @@ -23,35 +24,35 @@ def intraday_mean_revert(): for i in prange(nsyms): symbol = sym_list[i] - s_open = f[symbol+'/Open'][:] - s_high = f[symbol+'/High'][:] - s_low = f[symbol+'/Low'][:] - s_close = f[symbol+'/Close'][:] - s_vol = f[symbol+'/Volume'][:] + s_open = f[symbol + '/Open'][:] + s_high = f[symbol + '/High'][:] + s_low = f[symbol + '/Low'][:] + s_close = f[symbol + '/Close'][:] + s_vol = f[symbol + '/Volume'][:] df = pd.DataFrame({'Open': s_open, 'High': s_high, 'Low': s_low, - 'Close': s_close, 'Volume': s_vol,}) + 'Close': s_close, 'Volume': s_vol, }) - #create column to hold our 90 day rolling standard deviation + # create column to hold our 90 day rolling standard deviation df['Stdev'] = df['Close'].rolling(window=90).std() - #create a column to hold our 20 day moving average + # create a column to hold our 20 day moving average df['Moving Average'] = df['Close'].rolling(window=20).mean() - #create a column which holds a TRUE value if the gap down from previous day's low to next - #day's open is larger than the 90 day rolling standard deviation + # create a column which holds a TRUE value if the gap down from previous day's low to next + # day's open is larger than the 90 day rolling standard deviation df['Criteria1'] = (df['Open'] - df['Low'].shift(1)) < -df['Stdev'] - #create a column which holds a TRUE value if the opening price of the stock is above the 20 day moving average + # create a column which holds a TRUE value if the opening price of the stock is above the 20 day moving average df['Criteria2'] = df['Open'] > df['Moving Average'] - #create a column that holds a TRUE value if both above criteria are also TRUE + # create a column that holds a TRUE value if both above criteria are also TRUE df['BUY'] = df['Criteria1'] & df['Criteria2'] - #calculate daily % return series for stock + # calculate daily % return series for stock df['Pct Change'] = (df['Close'] - df['Open']) / df['Open'] - #create a strategy return series by using the daily stock returns where the trade criteria above are met - df['Rets'] = df['Pct Change'][df['BUY'] == True] + # create a strategy return series by using the daily stock returns where the trade criteria above are met + df['Rets'] = df['Pct Change'][df['BUY']] n_days = len(df['Rets']) res = np.zeros(max_num_days) @@ -61,6 +62,7 @@ def intraday_mean_revert(): f.close() print(all_res.mean()) - print("execution time:", time.time()-t1) + print("execution time:", time.time() - t1) + intraday_mean_revert() diff --git a/examples/intraday_mean_rand.py b/examples/intraday_mean_rand.py index 359b6df79..261755c71 100644 --- a/examples/intraday_mean_rand.py +++ b/examples/intraday_mean_rand.py @@ -7,6 +7,7 @@ # adopted from: # http://www.pythonforfinance.net/2017/02/20/intraday-stock-mean-reversion-trading-backtest-in-python/ + @hpat.jit def intraday_mean_revert(): nsyms = 1000 @@ -15,38 +16,39 @@ def intraday_mean_revert(): t1 = time.time() for i in prange(nsyms): - #np.random.seed(0) + # np.random.seed(0) s_open = 20 * np.random.randn(max_num_days) s_low = 18 * np.random.randn(max_num_days) s_close = 19 * np.random.randn(max_num_days) df = pd.DataFrame({'Open': s_open, 'Low': s_low, - 'Close': s_close}) + 'Close': s_close}) - #create column to hold our 90 day rolling standard deviation + # create column to hold our 90 day rolling standard deviation df['Stdev'] = df['Close'].rolling(window=90).std() - #create a column to hold our 20 day moving average + # create a column to hold our 20 day moving average df['Moving Average'] = df['Close'].rolling(window=20).mean() - #create a column which holds a TRUE value if the gap down from previous day's low to next - #day's open is larger than the 90 day rolling standard deviation + # create a column which holds a TRUE value if the gap down from previous day's low to next + # day's open is larger than the 90 day rolling standard deviation df['Criteria1'] = (df['Open'] - df['Low'].shift(1)) < -df['Stdev'] - #create a column which holds a TRUE value if the opening price of the stock is above the 20 day moving average + # create a column which holds a TRUE value if the opening price of the stock is above the 20 day moving average df['Criteria2'] = df['Open'] > df['Moving Average'] - #create a column that holds a TRUE value if both above criteria are also TRUE + # create a column that holds a TRUE value if both above criteria are also TRUE df['BUY'] = df['Criteria1'] & df['Criteria2'] - #calculate daily % return series for stock + # calculate daily % return series for stock df['Pct Change'] = (df['Close'] - df['Open']) / df['Open'] - #create a strategy return series by using the daily stock returns where the trade criteria above are met - df['Rets'] = df['Pct Change'][df['BUY'] == True] + # create a strategy return series by using the daily stock returns where the trade criteria above are met + df['Rets'] = df['Pct Change'][df['BUY']] all_res += df['Rets'].mean() print(all_res) - print("execution time:", time.time()-t1) + print("execution time:", time.time() - t1) + intraday_mean_revert() diff --git a/examples/k-means.py b/examples/k-means.py index ec19c94f0..7c8b1295f 100644 --- a/examples/k-means.py +++ b/examples/k-means.py @@ -5,6 +5,7 @@ import h5py import hpat + @hpat.jit def kmeans(numCenter, numIter): f = h5py.File("lr.hdf5", "r") @@ -16,17 +17,18 @@ def kmeans(numCenter, numIter): t1 = time.time() for l in range(numIter): - dist = np.array([[sqrt(np.sum((A[i,:]-centroids[j,:])**2)) - for j in range(numCenter)] for i in range(N)]) - labels = np.array([dist[i,:].argmin() for i in range(N)]) + dist = np.array([[sqrt(np.sum((A[i, :] - centroids[j, :])**2)) + for j in range(numCenter)] for i in range(N)]) + labels = np.array([dist[i, :].argmin() for i in range(N)]) - centroids = np.array([[np.sum(A[labels==i, j])/np.sum(labels==i) - for j in range(D)] for i in range(numCenter)]) + centroids = np.array([[np.sum(A[labels == i, j]) / np.sum(labels == i) + for j in range(D)] for i in range(numCenter)]) t2 = time.time() - print("Execution time:", t2-t1, "\nresult:", centroids) + print("Execution time:", t2 - t1, "\nresult:", centroids) return centroids + def main(): parser = argparse.ArgumentParser(description='K-Means') # parser.add_argument('--file', dest='file', type=str, default="lr.hdf5") @@ -37,9 +39,10 @@ def main(): iterations = args.iterations #D = 10 - #np.random.seed(0) + # np.random.seed(0) #init_centroids = np.random.ranf((centers, D)) res = kmeans(centers, iterations) + if __name__ == '__main__': main() diff --git a/examples/kernel_density_estimation.py b/examples/kernel_density_estimation.py index 601d12118..d568b33f6 100644 --- a/examples/kernel_density_estimation.py +++ b/examples/kernel_density_estimation.py @@ -5,6 +5,7 @@ import argparse import time + @hpat.jit def kde(): f = h5py.File("kde.hdf5", "r") @@ -18,13 +19,14 @@ def kde(): t1 = time.time() for i in prange(n): p = X[i] - d = (-(p-points)**2)/(2*b**2) + d = (-(p - points)**2) / (2 * b**2) m = np.min(d) - exps += m-np.log(b*N)+np.log(np.sum(np.exp(d-m))) - t = time.time()-t1 - print("Execution time:", t,"\nresult:", exps) + exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) + t = time.time() - t1 + print("Execution time:", t, "\nresult:", exps) return exps + def main(): parser = argparse.ArgumentParser(description='Kernel-Density') parser.add_argument('--file', dest='file', type=str, default="lr.hdf5") @@ -33,5 +35,6 @@ def main(): res = kde() + if __name__ == '__main__': main() diff --git a/examples/kernel_density_estimation_pq.py b/examples/kernel_density_estimation_pq.py index f77412997..d629935c9 100644 --- a/examples/kernel_density_estimation_pq.py +++ b/examples/kernel_density_estimation_pq.py @@ -5,6 +5,7 @@ import argparse import time + @hpat.jit def kde(): t = pq.read_table('kde.parquet') @@ -18,18 +19,20 @@ def kde(): t1 = time.time() for i in prange(n): p = X[i] - d = (-(p-points)**2)/(2*b**2) + d = (-(p - points)**2) / (2 * b**2) m = np.min(d) - exps += m-np.log(b*N)+np.log(np.sum(np.exp(d-m))) - t = time.time()-t1 - print("Execution time:", t,"\nresult:", exps) + exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) + t = time.time() - t1 + print("Execution time:", t, "\nresult:", exps) return exps + def main(): parser = argparse.ArgumentParser(description='Kernel-Density') args = parser.parse_args() res = kde() + if __name__ == '__main__': main() diff --git a/examples/kernel_density_estimation_pq_hdfs.py b/examples/kernel_density_estimation_pq_hdfs.py index c452e7e49..265e051da 100644 --- a/examples/kernel_density_estimation_pq_hdfs.py +++ b/examples/kernel_density_estimation_pq_hdfs.py @@ -5,6 +5,7 @@ import argparse import time + @hpat.jit def kde(): t = pq.read_table('hdfs://localhost:9016/user/etotoni/kde.parquet') @@ -18,18 +19,20 @@ def kde(): t1 = time.time() for i in prange(n): p = X[i] - d = (-(p-points)**2)/(2*b**2) + d = (-(p - points)**2) / (2 * b**2) m = np.min(d) - exps += m-np.log(b*N)+np.log(np.sum(np.exp(d-m))) - t = time.time()-t1 - print("Execution time:", t,"\nresult:", exps) + exps += m - np.log(b * N) + np.log(np.sum(np.exp(d - m))) + t = time.time() - t1 + print("Execution time:", t, "\nresult:", exps) return exps + def main(): parser = argparse.ArgumentParser(description='Kernel-Density') args = parser.parse_args() res = kde() + if __name__ == '__main__': main() diff --git a/examples/linear_regression.py b/examples/linear_regression.py index e2cb23eb5..ba6898bd6 100644 --- a/examples/linear_regression.py +++ b/examples/linear_regression.py @@ -4,23 +4,25 @@ import argparse import time + @hpat.jit def linear_regression(iterations): f = h5py.File("lir.hdf5", "r") X = f['points'][:] Y = f['responses'][:] f.close() - N,D = X.shape + N, D = X.shape p = Y.shape[1] - alphaN = 0.01/N - w = np.zeros((D,p)) + alphaN = 0.01 / N + w = np.zeros((D, p)) t1 = time.time() for i in range(iterations): - w -= alphaN * np.dot(X.T, np.dot(X,w)-Y) + w -= alphaN * np.dot(X.T, np.dot(X, w) - Y) t2 = time.time() - print("Execution time:", t2-t1, "\nresult:", w) + print("Execution time:", t2 - t1, "\nresult:", w) return w + def main(): parser = argparse.ArgumentParser(description='Linear Regression.') parser.add_argument('--file', dest='file', type=str, default="lr.hdf5") @@ -32,5 +34,6 @@ def main(): w = linear_regression(iterations) + if __name__ == '__main__': main() diff --git a/examples/logistic_regression.py b/examples/logistic_regression.py index 26da04b16..bdc146ce3 100644 --- a/examples/logistic_regression.py +++ b/examples/logistic_regression.py @@ -4,6 +4,7 @@ import argparse import time + @hpat.jit def logistic_regression(iterations): f = h5py.File("lr.hdf5", "r") @@ -11,14 +12,15 @@ def logistic_regression(iterations): Y = f['responses'][:] f.close() D = X.shape[1] - w = np.ones(D)-0.5 + w = np.ones(D) - 0.5 t1 = time.time() for i in range(iterations): - w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X,w))) - 1.0) * Y),X) + w -= np.dot(((1.0 / (1.0 + np.exp(-Y * np.dot(X, w))) - 1.0) * Y), X) t2 = time.time() - print("Execution time:", t2-t1, "\nresult:", w) + print("Execution time:", t2 - t1, "\nresult:", w) return w + def main(): parser = argparse.ArgumentParser(description='Logistic Regression.') parser.add_argument('--file', dest='file', type=str, default="lr.hdf5") @@ -30,5 +32,6 @@ def main(): w = logistic_regression(iterations) + if __name__ == '__main__': main() diff --git a/examples/logistic_regression_rand.py b/examples/logistic_regression_rand.py index 90f585007..a5d87e229 100644 --- a/examples/logistic_regression_rand.py +++ b/examples/logistic_regression_rand.py @@ -2,6 +2,7 @@ import hpat import time + @hpat.jit def logistic_regression(iterations): t1 = time.time() @@ -18,7 +19,8 @@ def logistic_regression(iterations): accuracy = np.sum(R == Y) / N print(accuracy, w) - print("Execution time:", time.time()-t1) + print("Execution time:", time.time() - t1) return w + w = logistic_regression(20) diff --git a/examples/ml_mnb.py b/examples/ml_mnb.py index 706616080..dd4682752 100644 --- a/examples/ml_mnb.py +++ b/examples/ml_mnb.py @@ -6,21 +6,23 @@ #hpat.multithread_mode = True + @hpat.jit def f(N, D, M): X = np.random.randint(0, 5, size=(N, D)).astype(np.int32) y = np.empty(N, dtype=np.int32) for i in prange(N): - y[i] = i%4 + y[i] = i % 4 p = np.random.randint(0, 5, size=(M, D)).astype(np.int32) clf = hpat.ml.MultinomialNB(n_classes=4) t1 = time.time() clf.train(X, y) res = clf.predict(p) - print("Exec time:", time.time()-t1) + print("Exec time:", time.time() - t1) return res.sum() -N = 1024*128 + +N = 1024 * 128 D = 20 M = 128 diff --git a/examples/ml_svc.py b/examples/ml_svc.py index d02f3f3e2..ee75bab8c 100644 --- a/examples/ml_svc.py +++ b/examples/ml_svc.py @@ -6,21 +6,23 @@ hpat.multithread_mode = True + @hpat.jit def f(N, D, M): X = np.random.ranf((N, D)) y = np.empty(N) for i in prange(N): - y[i] = i%4 + y[i] = i % 4 p = np.random.ranf((M, D)) clf = hpat.ml.SVC(n_classes=4) t1 = time.time() clf.train(X, y) res = clf.predict(p) - print("Exec time:", time.time()-t1) + print("Exec time:", time.time() - t1) return res.sum() -N = 1024*16 + +N = 1024 * 16 D = 20 M = 128 diff --git a/examples/pi.py b/examples/pi.py index 26ba242f1..c257592d6 100644 --- a/examples/pi.py +++ b/examples/pi.py @@ -3,15 +3,17 @@ import argparse import time + @hpat.jit def calc_pi(n): t1 = time.time() x = 2 * np.random.ranf(n) - 1 y = 2 * np.random.ranf(n) - 1 pi = 4 * np.sum(x**2 + y**2 < 1) / n - print("Execution time:", time.time()-t1, "\nresult:", pi) + print("Execution time:", time.time() - t1, "\nresult:", pi) return pi + def main(): parser = argparse.ArgumentParser(description='Monte Carlo Pi Calculation.') parser.add_argument('--points', dest='points', type=int, default=200000000) @@ -19,5 +21,6 @@ def main(): points = args.points calc_pi(points) + if __name__ == '__main__': main() diff --git a/examples/ros_example1.py b/examples/ros_example1.py index ce7f7f0fc..366364453 100644 --- a/examples/ros_example1.py +++ b/examples/ros_example1.py @@ -5,20 +5,22 @@ from hpat import prange, stencil import time + @stencil def gaussian_blur(a): - return (a[-2,-2] * 0.003 + a[-1,-2] * 0.0133 + a[0,-2] * 0.0219 + a[1,-2] * 0.0133 + a[2,-2] * 0.0030 + - a[-2,-1] * 0.0133 + a[-1,-1] * 0.0596 + a[0,-1] * 0.0983 + a[1,-1] * 0.0596 + a[2,-1] * 0.0133 + - a[-2, 0] * 0.0219 + a[-1, 0] * 0.0983 + a[0, 0] * 0.1621 + a[1, 0] * 0.0983 + a[2, 0] * 0.0219 + - a[-2, 1] * 0.0133 + a[-1, 1] * 0.0596 + a[0, 1] * 0.0983 + a[1, 1] * 0.0596 + a[2, 1] * 0.0133 + - a[-2, 2] * 0.003 + a[-1, 2] * 0.0133 + a[0, 2] * 0.0219 + a[1, 2] * 0.0133 + a[2, 2] * 0.0030) + return (a[-2, -2] * 0.003 + a[-1, -2] * 0.0133 + a[0, -2] * 0.0219 + a[1, -2] * 0.0133 + a[2, -2] * 0.0030 + + a[-2, -1] * 0.0133 + a[-1, -1] * 0.0596 + a[0, -1] * 0.0983 + a[1, -1] * 0.0596 + a[2, -1] * 0.0133 + + a[-2, 0] * 0.0219 + a[-1, 0] * 0.0983 + a[0, 0] * 0.1621 + a[1, 0] * 0.0983 + a[2, 0] * 0.0219 + + a[-2, 1] * 0.0133 + a[-1, 1] * 0.0596 + a[0, 1] * 0.0983 + a[1, 1] * 0.0596 + a[2, 1] * 0.0133 + + a[-2, 2] * 0.003 + a[-1, 2] * 0.0133 + a[0, 2] * 0.0219 + a[1, 2] * 0.0133 + a[2, 2] * 0.0030) + @hpat.jit def read_example(): t1 = time.time() A = hpat.ros.read_ros_images("image_test.bag") # crop out dashboard - B = A[:,:-50,:,:] + B = A[:, :-50, :, :] # intensity threshold threshold = B.mean() + .004 * B.std() n = B.shape[0] @@ -29,20 +31,20 @@ def read_example(): C = B[mask] D = np.empty_like(C) for i in prange(len(C)): - D[i,:,:,0] = gaussian_blur(C[i,:,:,0]) - D[i,:,:,1] = gaussian_blur(C[i,:,:,1]) - D[i,:,:,2] = gaussian_blur(C[i,:,:,2]) + D[i, :, :, 0] = gaussian_blur(C[i, :, :, 0]) + D[i, :, :, 1] = gaussian_blur(C[i, :, :, 1]) + D[i, :, :, 2] = gaussian_blur(C[i, :, :, 2]) # K-means model numCenter = 4 numIter = 10 dn, dh, dw, dc = D.shape centroids = np.random.randint(0, 255, (numCenter, dh, dw, dc)).astype(np.uint8) for l in range(numIter): - dist = np.array([[sqrt(np.sum((D[i]-centroids[j])**2)) - for j in range(numCenter)] for i in range(dn)]) + dist = np.array([[sqrt(np.sum((D[i] - centroids[j])**2)) + for j in range(numCenter)] for i in range(dn)]) labels = np.array([dist[i].argmin() for i in range(dn)]) for i in range(numCenter): - mask2 = (labels==i) + mask2 = (labels == i) num_points = np.sum(mask2) if num_points != 0: centroids[i] = np.sum(D[mask2], 0) / num_points @@ -50,8 +52,9 @@ def read_example(): centroids[i] = np.random.randint(0, 255, (dh, dw, dc)).astype(np.uint8) t2 = time.time() - print("Exec time: ", t2-t1) + print("Exec time: ", t2 - t1) return centroids + print(read_example().sum()) -#hpat.distribution_report() +# hpat.distribution_report() diff --git a/setup.py b/setup.py index da4e1d076..e3e45b7ae 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ is_win = platform.system() == 'Windows' + def readme(): with open('README.rst') as f: return f.read() @@ -54,7 +55,7 @@ def readme(): HDF5_DIR = os.environ['HDF5_DIR'] #PANDAS_DIR = "" -#if 'PANDAS_DIR' in os.environ: +# if 'PANDAS_DIR' in os.environ: # PANDAS_DIR = os.environ['PANDAS_DIR'] # package environment variable is PREFIX during build time @@ -64,7 +65,7 @@ def readme(): PREFIX_DIR = os.environ['CONDA_PREFIX'] # C libraries are in \Library on Windows if is_win: - PREFIX_DIR += '\Library' + PREFIX_DIR += r'\Library' try: @@ -99,13 +100,13 @@ def readme(): _has_xenon = False -if 'HPAT_XE_SUPPORT' in os.environ and os.environ['HPAT_XE_SUPPORT'] != "0": +if 'HPAT_XE_SUPPORT' in os.environ and os.environ['HPAT_XE_SUPPORT'] != "0": _has_xenon = True -ind = [PREFIX_DIR+'/include',] -lid = [PREFIX_DIR+'/lib',] -eca = ['-std=c++11',] # '-g', '-O0'] -ela = ['-std=c++11',] +ind = [PREFIX_DIR + '/include', ] +lid = [PREFIX_DIR + '/lib', ] +eca = ['-std=c++11', ] # '-g', '-O0'] +ela = ['-std=c++11', ] MPI_LIBS = ['mpi'] H5_CPP_FLAGS = [] @@ -113,11 +114,11 @@ def readme(): use_impi = False if use_impi: MPI_ROOT = os.environ['I_MPI_ROOT'] - MPI_INC = MPI_ROOT+'/include64/' - MPI_LIBDIR = MPI_ROOT+'/lib64/' - MPI_LIBS = [ 'mpifort', 'mpi' , 'mpigi'] - ind = [PREFIX_DIR+'/include', MPI_INC] - lid = [PREFIX_DIR+'/lib', MPI_LIBDIR] + MPI_INC = MPI_ROOT + '/include64/' + MPI_LIBDIR = MPI_ROOT + '/lib64/' + MPI_LIBS = ['mpifort', 'mpi', 'mpigi'] + ind = [PREFIX_DIR + '/include', MPI_INC] + lid = [PREFIX_DIR + '/lib', MPI_LIBDIR] if is_win: # use Intel MPI on Windows @@ -135,63 +136,63 @@ def readme(): depends=["hpat/_hpat_common.h", "hpat/_distributed.h", "hpat/_import_py.h", "hpat/io/_csv.h", "hpat/_datetime_ext.h"], - libraries = io_libs, - include_dirs = ind + np_compile_args['include_dirs'], - library_dirs = lid, - define_macros = H5_CPP_FLAGS, - extra_compile_args = eca, - extra_link_args = ela, + libraries=io_libs, + include_dirs=ind + np_compile_args['include_dirs'], + library_dirs=lid, + define_macros=H5_CPP_FLAGS, + extra_compile_args=eca, + extra_link_args=ela, language="c++" -) + ) ext_hdf5 = Extension(name="hpat.io._hdf5", - sources=["hpat/io/_hdf5.cpp"], - depends=[], - libraries = hdf5_libs, - include_dirs = [HDF5_DIR+'/include',] + ind, - library_dirs = [HDF5_DIR+'/lib',] + lid, - define_macros = H5_CPP_FLAGS, - extra_compile_args = eca, - extra_link_args = ela, - language="c++" -) + sources=["hpat/io/_hdf5.cpp"], + depends=[], + libraries=hdf5_libs, + include_dirs=[HDF5_DIR + '/include', ] + ind, + library_dirs=[HDF5_DIR + '/lib', ] + lid, + define_macros=H5_CPP_FLAGS, + extra_compile_args=eca, + extra_link_args=ela, + language="c++" + ) ext_hdist = Extension(name="hpat.hdist", sources=["hpat/_distributed.cpp"], depends=["hpat/_hpat_common.h"], - libraries = MPI_LIBS, - extra_compile_args = eca, - extra_link_args = ela, - include_dirs = ind, - library_dirs = lid, -) + libraries=MPI_LIBS, + extra_compile_args=eca, + extra_link_args=ela, + include_dirs=ind, + library_dirs=lid, + ) ext_chiframes = Extension(name="hpat.chiframes", sources=["hpat/_hiframes.cpp"], - libraries = MPI_LIBS, + libraries=MPI_LIBS, depends=["hpat/_hpat_sort.h"], - extra_compile_args = eca, - extra_link_args = ela, - include_dirs = ind, - library_dirs = lid, -) + extra_compile_args=eca, + extra_link_args=ela, + include_dirs=ind, + library_dirs=lid, + ) ext_dict = Extension(name="hpat.hdict_ext", sources=["hpat/_dict_ext.cpp"], - extra_compile_args = eca, - extra_link_args = ela, - include_dirs = ind, - library_dirs = lid, -) + extra_compile_args=eca, + extra_link_args=ela, + include_dirs=ind, + library_dirs=lid, + ) ext_set = Extension(name="hpat.hset_ext", - sources=["hpat/_set_ext.cpp"], - extra_compile_args = eca, - extra_link_args = ela, - include_dirs = ind, - library_dirs = lid, -) + sources=["hpat/_set_ext.cpp"], + extra_compile_args=eca, + extra_link_args=ela, + include_dirs=ind, + library_dirs=lid, + ) str_libs = np_compile_args['libraries'] @@ -201,12 +202,12 @@ def readme(): ext_str = Extension(name="hpat.hstr_ext", sources=["hpat/_str_ext.cpp"], libraries=str_libs, - define_macros = np_compile_args['define_macros'] + [('USE_BOOST_REGEX', None)], - extra_compile_args = eca, - extra_link_args = ela, - include_dirs = np_compile_args['include_dirs'] + ind, - library_dirs = np_compile_args['library_dirs'] + lid, -) + define_macros=np_compile_args['define_macros'] + [('USE_BOOST_REGEX', None)], + extra_compile_args=eca, + extra_link_args=ela, + include_dirs=np_compile_args['include_dirs'] + ind, + library_dirs=np_compile_args['library_dirs'] + lid, + ) #dt_args = copy.copy(np_compile_args) #dt_args['include_dirs'] = dt_args['include_dirs'] + [PANDAS_DIR+'/_libs/src/datetime/'] @@ -216,23 +217,23 @@ def readme(): ext_dt = Extension(name="hpat.hdatetime_ext", sources=["hpat/_datetime_ext.cpp"], libraries=np_compile_args['libraries'], - define_macros = np_compile_args['define_macros'], + define_macros=np_compile_args['define_macros'], extra_compile_args=['-std=c++11'], extra_link_args=['-std=c++11'], - include_dirs = np_compile_args['include_dirs'], - library_dirs = np_compile_args['library_dirs'], + include_dirs=np_compile_args['include_dirs'], + library_dirs=np_compile_args['library_dirs'], language="c++" -) + ) ext_quantile = Extension(name="hpat.quantile_alg", sources=["hpat/_quantile_alg.cpp"], depends=["hpat/_hpat_common.h"], - libraries = MPI_LIBS, - extra_compile_args = eca, - extra_link_args = ela, - include_dirs = ind, - library_dirs = lid, -) + libraries=MPI_LIBS, + extra_compile_args=eca, + extra_link_args=ela, + include_dirs=ind, + library_dirs=lid, + ) # pq_libs = MPI_LIBS + ['boost_filesystem', 'arrow', 'parquet'] @@ -253,15 +254,15 @@ def readme(): ext_parquet = Extension(name="hpat.parquet_cpp", sources=["hpat/io/_parquet.cpp"], - libraries = pq_libs, - include_dirs = ['.'] + ind, - define_macros = [('BUILTIN_PARQUET_READER', None)], - extra_compile_args = eca, - extra_link_args = ela, - library_dirs = lid, -) - -#ext_daal_wrapper = Extension(name="hpat.daal_wrapper", + libraries=pq_libs, + include_dirs=['.'] + ind, + define_macros=[('BUILTIN_PARQUET_READER', None)], + extra_compile_args=eca, + extra_link_args=ela, + library_dirs=lid, + ) + +# ext_daal_wrapper = Extension(name="hpat.daal_wrapper", # include_dirs = [DAALROOT+'/include'], # libraries = ['daal_core', 'daal_thread']+MPI_LIBS, # sources=["hpat/_daal.cpp"] @@ -269,35 +270,61 @@ def readme(): ext_ros = Extension(name="hpat.ros_cpp", sources=["hpat/_ros.cpp"], - include_dirs = ['/opt/ros/lunar/include', '/opt/ros/lunar/include/xmlrpcpp', PREFIX_DIR+'/include/', './ros_include'], - extra_compile_args = eca, - extra_link_args = ela + '-rdynamic /opt/ros/lunar/lib/librosbag.so /opt/ros/lunar/lib/librosbag_storage.so -lboost_program_options /opt/ros/lunar/lib/libroslz4.so /opt/ros/lunar/lib/libtopic_tools.so /opt/ros/lunar/lib/libroscpp.so -lboost_filesystem -lboost_signals /opt/ros/lunar/lib/librosconsole.so /opt/ros/lunar/lib/librosconsole_log4cxx.so /opt/ros/lunar/lib/librosconsole_backend_interface.so -lboost_regex /opt/ros/lunar/lib/libroscpp_serialization.so /opt/ros/lunar/lib/librostime.so /opt/ros/lunar/lib/libxmlrpcpp.so /opt/ros/lunar/lib/libcpp_common.so -lboost_system -lboost_thread -lboost_chrono -lboost_date_time -lboost_atomic -lpthread -Wl,-rpath,/opt/ros/lunar/lib'.split(), - library_dirs = lid, -) + include_dirs=['/opt/ros/lunar/include', + '/opt/ros/lunar/include/xmlrpcpp', + PREFIX_DIR + '/include/', + './ros_include'], + extra_compile_args=eca, + extra_link_args=ela + ['-rdynamic', + '/opt/ros/lunar/lib/librosbag.so', + '/opt/ros/lunar/lib/librosbag_storage.so', + '-lboost_program_options', + '/opt/ros/lunar/lib/libroslz4.so', + '/opt/ros/lunar/lib/libtopic_tools.so', + '/opt/ros/lunar/lib/libroscpp.so', + '-lboost_filesystem', + '-lboost_signals', + '/opt/ros/lunar/lib/librosconsole.so', + '/opt/ros/lunar/lib/librosconsole_log4cxx.so', + '/opt/ros/lunar/lib/librosconsole_backend_interface.so', + '-lboost_regex', + '/opt/ros/lunar/lib/libroscpp_serialization.so', + '/opt/ros/lunar/lib/librostime.so', + '/opt/ros/lunar/lib/libxmlrpcpp.so', + '/opt/ros/lunar/lib/libcpp_common.so', + '-lboost_system', + '-lboost_thread', + '-lboost_chrono', + '-lboost_date_time', + '-lboost_atomic', + '-lpthread', + '-Wl,-rpath,/opt/ros/lunar/lib'], + library_dirs=lid, + ) cv_libs = ['opencv_core', 'opencv_imgproc', 'opencv_imgcodecs', 'opencv_highgui'] # XXX cv lib file name needs version on Windows if is_win: - cv_libs = [l+'331' for l in cv_libs] + cv_libs = [l + '331' for l in cv_libs] ext_cv_wrapper = Extension(name="hpat.cv_wrapper", sources=["hpat/_cv.cpp"], - include_dirs = [OPENCV_DIR+'/include'] + ind, - library_dirs = [os.path.join(OPENCV_DIR,'lib')] + lid, - libraries = cv_libs, + include_dirs=[OPENCV_DIR + '/include'] + ind, + library_dirs=[os.path.join(OPENCV_DIR, 'lib')] + lid, + libraries=cv_libs, #extra_link_args = cv_link_args, language="c++", -) + ) ext_xenon_wrapper = Extension(name="hpat.hxe_ext", sources=["hpat/io/_xe_wrapper.cpp"], #include_dirs = ['/usr/include'], - include_dirs = ['.'] + ind, - library_dirs = ['.'] + lid, - libraries = ['xe'], - extra_compile_args = eca, - extra_link_args = ela, -) + include_dirs=['.'] + ind, + library_dirs=['.'] + lid, + libraries=['xe'], + extra_compile_args=eca, + extra_link_args=ela, + ) _ext_mods = [ext_hdist, ext_chiframes, ext_dict, ext_set, ext_str, ext_quantile, ext_dt, ext_io] @@ -305,7 +332,7 @@ def readme(): _ext_mods.append(ext_hdf5) if _has_pyarrow: _ext_mods.append(ext_parquet) -#if _has_daal: +# if _has_daal: # _ext_mods.append(ext_daal_wrapper) if _has_ros: _ext_mods.append(ext_ros) @@ -352,7 +379,7 @@ class style(Command): def _get_file_list(self, path, search_extentions): """ Return file list to be adjusted or checked - + path - is the project base path search_extentions - list of strings with files extension to search recurcivly """ @@ -446,20 +473,20 @@ def run(self): description='compiling Python code for clusters', long_description=readme(), classifiers=[ - "Development Status :: 2 - Pre-Alpha", - "Intended Audience :: Developers", - "Operating System :: POSIX :: Linux", - "Programming Language :: Python", - "Programming Language :: Python :: 3.6", - "Topic :: Software Development :: Compilers", - "Topic :: System :: Distributed Computing", + "Development Status :: 2 - Pre-Alpha", + "Intended Audience :: Developers", + "Operating System :: POSIX :: Linux", + "Programming Language :: Python", + "Programming Language :: Python :: 3.6", + "Topic :: Software Development :: Compilers", + "Topic :: System :: Distributed Computing", ], keywords='data analytics cluster', url='https://github.com/IntelLabs/hpat', author='Intel', packages=find_packages(), - package_data={'hpat.tests': ['*.bz2'],}, + package_data={'hpat.tests': ['*.bz2'], }, install_requires=['numba'], extras_require={'HDF5': ["h5py"], 'Parquet': ["pyarrow"]}, cmdclass=hpat_build_commands, - ext_modules = _ext_mods) + ext_modules=_ext_mods) diff --git a/tutorial/intraday_mean.py b/tutorial/intraday_mean.py index 1b4c55bb1..f843afea8 100644 --- a/tutorial/intraday_mean.py +++ b/tutorial/intraday_mean.py @@ -8,9 +8,10 @@ # adopted from: # http://www.pythonforfinance.net/2017/02/20/intraday-stock-mean-reversion-trading-backtest-in-python/ + @hpat.jit(locals={'s_open': hpat.float64[:], 's_high': hpat.float64[:], - 's_low': hpat.float64[:], 's_close': hpat.float64[:], - 's_vol': hpat.float64[:]}) + 's_low': hpat.float64[:], 's_close': hpat.float64[:], + 's_vol': hpat.float64[:]}) def intraday_mean_revert(): file_name = "stock_data_all_google.hdf5" f = h5py.File(file_name, "r") @@ -23,35 +24,35 @@ def intraday_mean_revert(): for i in prange(nsyms): symbol = sym_list[i] - s_open = f[symbol+'/Open'][:] - s_high = f[symbol+'/High'][:] - s_low = f[symbol+'/Low'][:] - s_close = f[symbol+'/Close'][:] - s_vol = f[symbol+'/Volume'][:] + s_open = f[symbol + '/Open'][:] + s_high = f[symbol + '/High'][:] + s_low = f[symbol + '/Low'][:] + s_close = f[symbol + '/Close'][:] + s_vol = f[symbol + '/Volume'][:] df = pd.DataFrame({'Open': s_open, 'High': s_high, 'Low': s_low, - 'Close': s_close, 'Volume': s_vol,}) + 'Close': s_close, 'Volume': s_vol, }) - #create column to hold our 90 day rolling standard deviation + # create column to hold our 90 day rolling standard deviation df['Stdev'] = df['Close'].rolling(window=90).std() - #create a column to hold our 20 day moving average + # create a column to hold our 20 day moving average df['Moving Average'] = df['Close'].rolling(window=20).mean() - #create a column which holds a TRUE value if the gap down from previous day's low to next - #day's open is larger than the 90 day rolling standard deviation + # create a column which holds a TRUE value if the gap down from previous day's low to next + # day's open is larger than the 90 day rolling standard deviation df['Criteria1'] = (df['Open'] - df['Low'].shift(1)) < -df['Stdev'] - #create a column which holds a TRUE value if the opening price of the stock is above the 20 day moving average + # create a column which holds a TRUE value if the opening price of the stock is above the 20 day moving average df['Criteria2'] = df['Open'] > df['Moving Average'] - #create a column that holds a TRUE value if both above criteria are also TRUE + # create a column that holds a TRUE value if both above criteria are also TRUE df['BUY'] = df['Criteria1'] & df['Criteria2'] - #calculate daily % return series for stock + # calculate daily % return series for stock df['Pct Change'] = (df['Close'] - df['Open']) / df['Open'] - #create a strategy return series by using the daily stock returns where the trade criteria above are met - df['Rets'] = df['Pct Change'][df['BUY'] == True] + # create a strategy return series by using the daily stock returns where the trade criteria above are met + df['Rets'] = df['Pct Change'][df['BUY']] n_days = len(df['Rets']) res = np.zeros(max_num_days) @@ -61,6 +62,7 @@ def intraday_mean_revert(): f.close() print(all_res.mean()) - print("execution time:", time.time()-t1) + print("execution time:", time.time() - t1) + intraday_mean_revert()