From 69b3a4f63f41299410595d4490c361da4ad5cd7a Mon Sep 17 00:00:00 2001 From: Kathryn Hempstalk Date: Fri, 2 Mar 2018 15:57:08 +1300 Subject: [PATCH 1/2] Updated module to be installable, rearranged files. --- .DS_Store | Bin 0 -> 6148 bytes MANIFEST.in | 1 + README.md | 28 ++++++++++++++++-- __init__.py | 1 + data/.DS_Store | Bin 0 -> 6148 bytes data/stoplists/.DS_Store | Bin 0 -> 6148 bytes .../stoplists/FoxStoplist.txt | 0 .../stoplists/FrenchStoplist.txt | 0 .../stoplists/SmartStoplist.txt | 0 .../stoplists/SpanishStoplist.txt | 0 rake.py | 10 +++---- rake_tutorial.py | 2 +- setup.py | 15 ++++++++++ 13 files changed, 49 insertions(+), 8 deletions(-) create mode 100644 .DS_Store create mode 100644 MANIFEST.in create mode 100644 __init__.py create mode 100644 data/.DS_Store create mode 100644 data/stoplists/.DS_Store rename FoxStoplist.txt => data/stoplists/FoxStoplist.txt (100%) rename FrenchStoplist.txt => data/stoplists/FrenchStoplist.txt (100%) rename SmartStoplist.txt => data/stoplists/SmartStoplist.txt (100%) rename SpanishStoplist.txt => data/stoplists/SpanishStoplist.txt (100%) create mode 100644 setup.py diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..66d21f7f2956eaf43865532a3ef230b4a9759552 GIT binary patch literal 6148 zcmeHKJxc>Y5PcI*Nl-AQU~{F7g0;;hrU=hIP^PqUR+ z>-e7v=-F)}!yLEBQ9VC>CC=zaU(Vd}BaAUbfnnvGHSVJpw~q_-F~Bj-IqnD(+~a}e z3Av9bYV1$4Un{e;aETNh^f*dN&Ru<7@=V5ih0%1Q7u>~+@iRQtX6P{ftrEY-bxg=9 zaQ(BRbv!vF@1??H)65hw1x$erP=GmGt-0&bN>jiTFa;J0==UL}D<%O;kM7gK!d(G~ z5xbqSu0Mpzi3279OOJd)6Hg_2s)Q?scsl#z5SIijJ$gDMTs|bsEa8SCZg!3zyKqS2 z(MnUm6j)YZ%QxGa|I^>^|H~w6nF6N3Mkyds@2c13mO^e_*_`Iun0`Z7)40;(g2IYR g#f;Tdd_Z@`{#Yx-Bw*>0Jv93fa57k73jC=8-yRuWCjbBd literal 0 HcmV?d00001 diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..e0f334d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1 @@ +include data/stoplists/* diff --git a/README.md b/README.md index f9e103a..803d7af 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,30 @@ -RAKE -==== +# RAKE +--- A Python implementation of the Rapid Automatic Keyword Extraction (RAKE) algorithm as described in: Rose, S., Engel, D., Cramer, N., & Cowley, W. (2010). Automatic Keyword Extraction from Individual Documents. In M. W. Berry & J. Kogan (Eds.), Text Mining: Theory and Applications: John Wiley & Sons. The source code is released under the MIT License. + +## Installing rake + +To install rake as a package, run: + +`python setup.py install` + +## Example use + +```python +from nlp_rake import rake + +stoppath = 'data/stoplists/SmartStoplist.txt' + +rake_object = rake.Rake(stoppath, 5, 3, 4) + +sample_file = open("data/docs/fao_test/w2167e.txt", 'r', encoding="iso-8859-1") +text = sample_file.read() + +keywords = rake_object.run(text) + +# 3. print results +print("Keywords:", keywords) +``` diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..d333644 --- /dev/null +++ b/__init__.py @@ -0,0 +1 @@ +import rake diff --git a/data/.DS_Store b/data/.DS_Store new file mode 100644 index 0000000000000000000000000000000000000000..40be868e1b40aa70e1c9b22a55b164770e1fd8a1 GIT binary patch literal 6148 zcmeHKJ8AX}IHYlzQdjT|gxQ?HjY$>Qg(To*7i?EH=kc3Q!Y)>kB7_iT zKzbw1N7@G~X^4n7r*2KOBBCXlAd6BFnXZ~{+<5_H&9SF9I?$00`uo#q<7CSfuYPEn?sRO4KHl5&?x~)uAG&XNjjvZLP8JU}7zhS}fnXpQ_%Q}> zXNxTF4WkbRf`MS*ivc+w5}IIkEQWe?pwkinD9>mW=u%5aOmfVQ#Sj(ZwWB$3m`Jfo0mmpGP24>#w zW@eIo8!{OHY5M+r3lsn*RKd|H%{L;~MSC*xl`%1Cjs`V0sIbDW=N${j$bg*PIqrFG zd(83m{2JVFzDM&=)yqXyFX&l(bVN6F$>=R!u*MRv19Mz3?hQ}O(+tdT#kdVKto7X+ z`W(l2Lh($&Krj#t{5}J$*&>rO!?3|XFc1tJ7?AHnrV5sht)YH8XtV_&%5S6!$I?qk zOm-|CTSH!C$% Date: Fri, 2 Mar 2018 16:14:44 +1300 Subject: [PATCH 2/2] Cleaned up files. --- .DS_Store | Bin 6148 -> 0 bytes .gitignore | 1 + data/.DS_Store | Bin 6148 -> 0 bytes 3 files changed, 1 insertion(+) delete mode 100644 .DS_Store delete mode 100644 data/.DS_Store diff --git a/.DS_Store b/.DS_Store deleted file mode 100644 index 66d21f7f2956eaf43865532a3ef230b4a9759552..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJxc>Y5PcI*Nl-AQU~{F7g0;;hrU=hIP^PqUR+ z>-e7v=-F)}!yLEBQ9VC>CC=zaU(Vd}BaAUbfnnvGHSVJpw~q_-F~Bj-IqnD(+~a}e z3Av9bYV1$4Un{e;aETNh^f*dN&Ru<7@=V5ih0%1Q7u>~+@iRQtX6P{ftrEY-bxg=9 zaQ(BRbv!vF@1??H)65hw1x$erP=GmGt-0&bN>jiTFa;J0==UL}D<%O;kM7gK!d(G~ z5xbqSu0Mpzi3279OOJd)6Hg_2s)Q?scsl#z5SIijJ$gDMTs|bsEa8SCZg!3zyKqS2 z(MnUm6j)YZ%QxGa|I^>^|H~w6nF6N3Mkyds@2c13mO^e_*_`Iun0`Z7)40;(g2IYR g#f;Tdd_Z@`{#Yx-Bw*>0Jv93fa57k73jC=8-yRuWCjbBd diff --git a/.gitignore b/.gitignore index a2dd459..50bf877 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ __pycache__/ # Distribution / packaging .Python +.DS_STORE env/ bin/ build/ diff --git a/data/.DS_Store b/data/.DS_Store deleted file mode 100644 index 40be868e1b40aa70e1c9b22a55b164770e1fd8a1..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6148 zcmeHKJ8AX}IHYlzQdjT|gxQ?HjY$>Qg(To*7i?EH=kc3Q!Y)>kB7_iT zKzbw1N7@G~X^4n7r*2KOBBCXlAd6BFnXZ~{+<5_H&9SF9I?$00`uo#q<7CSfuYPEn?sRO4KHl5&?x~)uAG&XNjjvZLP8JU}7zhS}fnXpQ_%Q}> zXNxTF4WkbRf`MS*ivc+w5}IIkEQWe?pwkinD9>mW=u%5aOmfVQ#Sj(