Skip to content
This repository was archived by the owner on May 25, 2022. It is now read-only.

Commit 6453b77

Browse files
authored
Merge pull request #68 from believeohiozua/master
web page summation
2 parents 6f13540 + 8292b59 commit 6453b77

File tree

12 files changed

+951
-0
lines changed

12 files changed

+951
-0
lines changed
Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
*.egg-info/
24+
.installed.cfg
25+
*.egg
26+
MANIFEST
27+
28+
# PyInstaller
29+
# Usually these files are written by a python script from a template
30+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
31+
*.manifest
32+
*.spec
33+
34+
# Installer logs
35+
pip-log.txt
36+
pip-delete-this-directory.txt
37+
38+
# Unit test / coverage reports
39+
htmlcov/
40+
.tox/
41+
.coverage
42+
.coverage.*
43+
.cache
44+
nosetests.xml
45+
coverage.xml
46+
*.cover
47+
.hypothesis/
48+
.pytest_cache/
49+
50+
# Translations
51+
*.mo
52+
*.pot
53+
54+
# Django stuff:
55+
*.log
56+
local_settings.py
57+
db.sqlite3
58+
59+
# Flask stuff:
60+
instance/
61+
.webassets-cache
62+
63+
# Scrapy stuff:
64+
.scrapy
65+
66+
# Sphinx documentation
67+
docs/_build/
68+
69+
# PyBuilder
70+
target/
71+
72+
# Jupyter Notebook
73+
.ipynb_checkpoints
74+
75+
# pyenv
76+
.python-version
77+
78+
# celery beat schedule file
79+
celerybeat-schedule
80+
81+
# SageMath parsed files
82+
*.sage.py
83+
84+
# Environments
85+
.env
86+
.venv
87+
venv/
88+
env.bak/
89+
venv.bak/
90+
env/
91+
92+
# Spyder project settings
93+
.spyderproject
94+
.spyproject
95+
96+
# Rope project settings
97+
.ropeproject
98+
99+
# mkdocs documentation
100+
/site
101+
102+
# mypy
103+
.mypy_cache/
104+
105+
__pycache__
106+
.vscode
107+
settings.json
108+
109+
Dependency directories
110+
node_modules/
111+
jspm_packages/
112+
113+
# Optional npm cache directory
114+
.npm
115+
.DS_Store
116+
.DS_Store
117+
datasets
118+
datasets/
119+
new_datasets/
120+
node_modules
121+
yarn.lock
122+
app
123+
__pycache__/
124+
dist
125+
build
126+
mlclassification-darwin-x64
127+
release-builds
128+
Classifi
129+
app
130+
dist
131+
build
132+
Summarize.spec
133+
__pycache__
134+
applog.log
135+
csv/
136+
beneficiary.csv
137+
.DS_Store
138+
applog.log
Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Website Summarization API
2+
3+
This project is carried out for the purpose of building a machine learning model for summarising a website from urls;
4+
5+
## Getting Started
6+
7+
These instructions will get you a copy of the project up and running on your local machine for development and testing purposes.
8+
9+
10+
### Prerequisites
11+
12+
Python distribution
13+
14+
```
15+
Anaconda
16+
```
17+
18+
### Installing
19+
20+
Install Anaconda python distribution on your system
21+
22+
Create a virtual environment called env.
23+
24+
```
25+
python -m venv app
26+
```
27+
28+
Activate the virtual environment
29+
30+
```
31+
LINUX/Mac: source app/bin/activate
32+
33+
Windows: app\Scripts\activate
34+
```
35+
36+
Upgrade to the latest pip
37+
38+
```
39+
pip install --upgrade pip
40+
```
41+
42+
Install dependencies using requirements file
43+
44+
```
45+
pip install -r requirements.txt
46+
```
47+
**Note: Your virtual environment must always be activated before running any command**
48+
49+
## Deployment
50+
51+
Start app (Make sure to enter a valid website to an existing website)
52+
53+
54+
Example of valid commands
55+
56+
```
57+
python app.py simple --url https://facebook.com --sentence 1 --language english
58+
python app.py simple --url https://facebook.com
59+
python app.py simple --url https://korapay.com
60+
python app.py bulk --path ./csv/valid_websites.csv
61+
```
62+
63+
64+
### APIs
65+
66+
This are command options in full:
67+
68+
```
69+
A command line utility for website Summarization.
70+
-----------------------------------------------
71+
These are common commands for this app.
72+
73+
positional arguments:
74+
action This has to be 'summarize'
75+
76+
optional arguments:
77+
-h, --help show this help message and exit
78+
--website PATH website of the url to be summarised
79+
80+
81+
## License
82+
83+
This project is licensed under the MIT License - see the [LICENSE](LICENSE.md) file for details
84+

projects/web page summation/app.py

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
#!/usr/bin/python
2+
from tempfile import NamedTemporaryFile
3+
from utils.summarize import summarize
4+
import csv
5+
import json
6+
import shutil
7+
import os
8+
import textwrap
9+
import logging
10+
import signal
11+
import argparse
12+
import sys
13+
import getopt
14+
15+
16+
def parse_args(argv):
17+
parser = argparse.ArgumentParser(
18+
formatter_class=argparse.RawDescriptionHelpFormatter,
19+
description=textwrap.dedent('''\
20+
A command line utility for website summarization.
21+
-----------------------------------------------
22+
These are common commands for this app.'''))
23+
parser.add_argument(
24+
'action',
25+
help='This action should be summarize')
26+
parser.add_argument(
27+
'--url',
28+
help='A link to the website url'
29+
)
30+
parser.add_argument(
31+
'--sentence',
32+
help='Argument to define number of sentence for the summary',
33+
type=int,
34+
default=2)
35+
parser.add_argument(
36+
'--language',
37+
help='Argument to define language of the summary',
38+
default='English')
39+
parser.add_argument(
40+
'--path',
41+
help='path to csv file')
42+
43+
return parser.parse_args(argv[1:])
44+
45+
46+
def readCsv(path):
47+
print('\n\n Processing Csv file \n\n')
48+
sys.stdout.flush()
49+
data = []
50+
try:
51+
with open(path, 'r') as userFile:
52+
userFileReader = csv.reader(userFile)
53+
for row in userFileReader:
54+
data.append(row)
55+
except:
56+
with open(path, 'r', encoding="mbcs") as userFile:
57+
userFileReader = csv.reader(userFile)
58+
for row in userFileReader:
59+
data.append(row)
60+
return data
61+
62+
63+
def writeCsv(data, LANGUAGE, SENTENCES_COUNT):
64+
print('\n\n Updating Csv file \n\n')
65+
sys.stdout.flush()
66+
with open('beneficiary.csv', 'w') as newFile:
67+
newFileWriter = csv.writer(newFile)
68+
length = len(data)
69+
position = data[0].index('website')
70+
for i in range(1, length):
71+
if i is 1:
72+
_data = data[0]
73+
_data.append("summary")
74+
newFileWriter.writerow(_data)
75+
try:
76+
__data = data[i]
77+
summary = summarize(
78+
(data[i][position]), LANGUAGE, SENTENCES_COUNT)
79+
__data.append(summary)
80+
newFileWriter.writerow(__data)
81+
except:
82+
print('\n\n Error Skipping line \n\n')
83+
sys.stdout.flush()
84+
85+
86+
def processCsv(path, LANGUAGE, SENTENCES_COUNT):
87+
try:
88+
print('\n\n Proessing Started \n\n')
89+
sys.stdout.flush()
90+
data = readCsv(path)
91+
writeCsv(data, LANGUAGE, SENTENCES_COUNT)
92+
except:
93+
print('\n\n Invalid file in file path \n\n')
94+
sys.stdout.flush()
95+
96+
97+
def main(argv=sys.argv):
98+
# Configure logging
99+
logging.basicConfig(filename='applog.log',
100+
filemode='w',
101+
level=logging.INFO,
102+
format='%(levelname)s:%(message)s')
103+
args = parse_args(argv)
104+
action = args.action
105+
url = args.url
106+
path = args.path
107+
LANGUAGE = "english" if args.language is None else args.language
108+
SENTENCES_COUNT = 2 if args.sentence is None else args.sentence
109+
if action == 'bulk':
110+
if path is None:
111+
print(
112+
'\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')
113+
sys.stdout.flush()
114+
return
115+
# guide against errors
116+
try:
117+
processCsv(path, LANGUAGE, SENTENCES_COUNT)
118+
except:
119+
print(
120+
'\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')
121+
sys.stdout.flush()
122+
print('Completed')
123+
sys.stdout.flush()
124+
if os.path.isfile('beneficiary.csv'):
125+
return shutil.move('beneficiary.csv', path)
126+
return
127+
if action == 'simple':
128+
# guide against errors
129+
try:
130+
summary = summarize(url, LANGUAGE, SENTENCES_COUNT)
131+
except:
132+
print(
133+
'\n\n Invalid Entry!, please Ensure you enter a valid web link \n\n')
134+
sys.stdout.flush()
135+
print('Completed')
136+
sys.stdout.flush()
137+
else:
138+
print(
139+
'\nAction command is not supported\n for help: run python3 app.py -h'
140+
)
141+
sys.stdout.flush()
142+
return
143+
144+
145+
if __name__ == '__main__':
146+
main()
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
sumy
2+
3+
nltk
4+
numpy
5+
argparse

projects/web page summation/utils/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)