Python-World · chavarera · Jul 22, 2020 · Jul 11, 2020 · Jul 11, 2020 · Jul 11, 2020
diff --git a/projects/web page summation/.gitignore b/projects/web page summation/.gitignore
@@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+venv/
+env.bak/
+venv.bak/
+env/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+__pycache__
+.vscode
+settings.json
+
+Dependency directories
+node_modules/
+jspm_packages/
+
+# Optional npm cache directory
+.npm
+.DS_Store
+.DS_Store
+datasets
+datasets/
+new_datasets/
+node_modules
+yarn.lock
+app
+__pycache__/
+dist
+build
+mlclassification-darwin-x64
+release-builds
+Classifi
+app
+dist
+build
+Summarize.spec
+__pycache__
+applog.log
+csv/
+beneficiary.csv
+.DS_Store
+applog.log
diff --git a/projects/web page summation/README.md b/projects/web page summation/README.md
@@ -0,0 +1,84 @@
+# Website Summarization API
+
+This project is carried out for the purpose of building a machine learning model for summarising a website from urls;
+
+## Getting Started
+
+These instructions will get you a copy of the project up and running on your local machine for development and testing purposes.
+
+
+### Prerequisites
+
+Python distribution
+
+```
+Anaconda
+```
+
+### Installing
+
+Install Anaconda python distribution on your system
+
+Create a virtual environment called env.
+
+```
+python -m venv app
+```
+
+Activate the virtual environment
+
+```
+LINUX/Mac: source app/bin/activate
+
+Windows: app\Scripts\activate
+```
+
+Upgrade to the latest pip
+
+```
+pip install --upgrade pip
+```
+
+Install dependencies using requirements file
+
+```
+pip install -r requirements.txt
+``` 
+**Note: Your virtual environment must always be activated before running any command**
+
+## Deployment
+
+Start app (Make sure to enter a valid website to an existing website)
+
+
+Example of valid commands
+
+```
+python app.py simple --url https://facebook.com --sentence 1 --language english
+python app.py simple --url https://facebook.com 
+python app.py simple --url https://korapay.com
+python app.py bulk --path ./csv/valid_websites.csv
+```
+
+
+### APIs
+
+This are command options in full:
+
+```
+A command line utility for website Summarization.
+-----------------------------------------------
+These are common commands for this app.
+
+positional arguments:
+  action            This has to be 'summarize'
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --website PATH        website of the url to be summarised
+
+
+## License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE.md) file for details
+
diff --git a/projects/web page summation/app.py b/projects/web page summation/app.py
@@ -0,0 +1,146 @@
+#!/usr/bin/python
+from tempfile import NamedTemporaryFile
+from utils.summarize import summarize
+import csv
+import json
+import shutil
+import os
+import textwrap
+import logging
+import signal
+import argparse
+import sys
+import getopt
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=textwrap.dedent('''\
+            A command line utility for website summarization.
+            -----------------------------------------------
+            These are common commands for this app.'''))
+    parser.add_argument(
+        'action',
+        help='This action should be summarize')
+    parser.add_argument(
+        '--url',
+        help='A link to the website url'
+    )
+    parser.add_argument(
+        '--sentence',
+        help='Argument to define number of sentence for the summary',
+        type=int,
+        default=2)
+    parser.add_argument(
+        '--language',
+        help='Argument to define language of the summary',
+        default='English')
+    parser.add_argument(
+        '--path',
+        help='path to csv file')
+
+    return parser.parse_args(argv[1:])
+
+
+def readCsv(path):
+    print('\n\n Processing Csv file \n\n')
+    sys.stdout.flush()
+    data = []
+    try:
+        with open(path, 'r') as userFile:
+            userFileReader = csv.reader(userFile)
+            for row in userFileReader:
+                data.append(row)
+    except:
+        with open(path, 'r', encoding="mbcs") as userFile:
+            userFileReader = csv.reader(userFile)
+            for row in userFileReader:
+                data.append(row)       
+    return data
+
+
+def writeCsv(data, LANGUAGE, SENTENCES_COUNT):
+    print('\n\n Updating Csv file \n\n')
+    sys.stdout.flush()
+    with open('beneficiary.csv', 'w') as newFile:
+        newFileWriter = csv.writer(newFile)
+        length = len(data)
+        position = data[0].index('website')
+        for i in range(1, length):
+            if i is 1:
+                _data = data[0]
+                _data.append("summary")
+                newFileWriter.writerow(_data)
+            try:
+                __data = data[i]
+                summary = summarize(
+                    (data[i][position]), LANGUAGE, SENTENCES_COUNT)
+                __data.append(summary)
+                newFileWriter.writerow(__data)
+            except:
+                print('\n\n Error Skipping line \n\n')
+                sys.stdout.flush()
+
+
+def processCsv(path, LANGUAGE, SENTENCES_COUNT):
+    try:
+        print('\n\n Proessing Started \n\n')
+        sys.stdout.flush()
+        data = readCsv(path)
+        writeCsv(data, LANGUAGE, SENTENCES_COUNT)
+    except:
+        print('\n\n Invalid file in file path \n\n')
+        sys.stdout.flush()
+
+
+def main(argv=sys.argv):
+        # Configure logging
+    logging.basicConfig(filename='applog.log',
+                        filemode='w',
+                        level=logging.INFO,
+                        format='%(levelname)s:%(message)s')
+    args = parse_args(argv)
+    action = args.action
+    url = args.url
+    path = args.path
+    LANGUAGE = "english" if args.language is None else args.language
+    SENTENCES_COUNT = 2 if args.sentence is None else args.sentence
+    if action == 'bulk':
+        if path is None:
+            print(
+                '\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')
+            sys.stdout.flush()
+            return
+        # guide against errors
+        try:
+            processCsv(path, LANGUAGE, SENTENCES_COUNT)
+        except:
+            print(
+                '\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')
+            sys.stdout.flush()
+        print('Completed')
+        sys.stdout.flush()
+        if os.path.isfile('beneficiary.csv'):
+            return shutil.move('beneficiary.csv', path)
+        return
+    if action == 'simple':
+        # guide against errors
+        try:
+            summary = summarize(url, LANGUAGE, SENTENCES_COUNT)
+        except:
+            print(
+                '\n\n Invalid Entry!, please Ensure you enter a valid web link \n\n')
+            sys.stdout.flush()
+        print('Completed')
+        sys.stdout.flush()
+    else:
+        print(
+            '\nAction command is not supported\n for help: run python3 app.py -h'
+        )
+        sys.stdout.flush()
+        return
+
+
+if __name__ == '__main__':
+    main()
diff --git a/projects/web page summation/requirements.txt b/projects/web page summation/requirements.txt
@@ -0,0 +1,5 @@
+sumy
+
+nltk
+numpy
+argparse
diff --git a/projects/web page summation/utils/__init__.py b/projects/web page summation/utils/__init__.py