diff --git a/projects/web page summation/.gitignore b/projects/web page summation/.gitignore
new file mode 100644
index 00000000..00c7b95b
--- /dev/null
+++ b/projects/web page summation/.gitignore	
@@ -0,0 +1,138 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+venv/
+env.bak/
+venv.bak/
+env/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+
+__pycache__
+.vscode
+settings.json
+
+Dependency directories
+node_modules/
+jspm_packages/
+
+# Optional npm cache directory
+.npm
+.DS_Store
+.DS_Store
+datasets
+datasets/
+new_datasets/
+node_modules
+yarn.lock
+app
+__pycache__/
+dist
+build
+mlclassification-darwin-x64
+release-builds
+Classifi
+app
+dist
+build
+Summarize.spec
+__pycache__
+applog.log
+csv/
+beneficiary.csv
+.DS_Store
+applog.log
diff --git a/projects/web page summation/README.md b/projects/web page summation/README.md
new file mode 100644
index 00000000..dab9f8ee
--- /dev/null
+++ b/projects/web page summation/README.md	
@@ -0,0 +1,84 @@
+# Website Summarization API
+
+This project is carried out for the purpose of building a machine learning model for summarising a website from urls;
+
+## Getting Started
+
+These instructions will get you a copy of the project up and running on your local machine for development and testing purposes.
+
+
+### Prerequisites
+
+Python distribution
+
+```
+Anaconda
+```
+
+### Installing
+
+Install Anaconda python distribution on your system
+
+Create a virtual environment called env.
+
+```
+python -m venv app
+```
+
+Activate the virtual environment
+
+```
+LINUX/Mac: source app/bin/activate
+
+Windows: app\Scripts\activate
+```
+
+Upgrade to the latest pip
+
+```
+pip install --upgrade pip
+```
+
+Install dependencies using requirements file
+
+```
+pip install -r requirements.txt
+``` 
+**Note: Your virtual environment must always be activated before running any command**
+
+## Deployment
+
+Start app (Make sure to enter a valid website to an existing website)
+
+
+Example of valid commands
+
+```
+python app.py simple --url https://facebook.com --sentence 1 --language english
+python app.py simple --url https://facebook.com 
+python app.py simple --url https://korapay.com
+python app.py bulk --path ./csv/valid_websites.csv
+```
+
+
+### APIs
+
+This are command options in full:
+
+```
+A command line utility for website Summarization.
+-----------------------------------------------
+These are common commands for this app.
+
+positional arguments:
+  action            This has to be 'summarize'
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --website PATH        website of the url to be summarised
+
+
+## License
+
+This project is licensed under the MIT License - see the [LICENSE](LICENSE.md) file for details
+
diff --git a/projects/web page summation/app.py b/projects/web page summation/app.py
new file mode 100644
index 00000000..94023696
--- /dev/null
+++ b/projects/web page summation/app.py	
@@ -0,0 +1,146 @@
+#!/usr/bin/python
+from tempfile import NamedTemporaryFile
+from utils.summarize import summarize
+import csv
+import json
+import shutil
+import os
+import textwrap
+import logging
+import signal
+import argparse
+import sys
+import getopt
+
+
+def parse_args(argv):
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        description=textwrap.dedent('''\
+            A command line utility for website summarization.
+            -----------------------------------------------
+            These are common commands for this app.'''))
+    parser.add_argument(
+        'action',
+        help='This action should be summarize')
+    parser.add_argument(
+        '--url',
+        help='A link to the website url'
+    )
+    parser.add_argument(
+        '--sentence',
+        help='Argument to define number of sentence for the summary',
+        type=int,
+        default=2)
+    parser.add_argument(
+        '--language',
+        help='Argument to define language of the summary',
+        default='English')
+    parser.add_argument(
+        '--path',
+        help='path to csv file')
+
+    return parser.parse_args(argv[1:])
+
+
+def readCsv(path):
+    print('\n\n Processing Csv file \n\n')
+    sys.stdout.flush()
+    data = []
+    try:
+        with open(path, 'r') as userFile:
+            userFileReader = csv.reader(userFile)
+            for row in userFileReader:
+                data.append(row)
+    except:
+        with open(path, 'r', encoding="mbcs") as userFile:
+            userFileReader = csv.reader(userFile)
+            for row in userFileReader:
+                data.append(row)       
+    return data
+
+
+def writeCsv(data, LANGUAGE, SENTENCES_COUNT):
+    print('\n\n Updating Csv file \n\n')
+    sys.stdout.flush()
+    with open('beneficiary.csv', 'w') as newFile:
+        newFileWriter = csv.writer(newFile)
+        length = len(data)
+        position = data[0].index('website')
+        for i in range(1, length):
+            if i is 1:
+                _data = data[0]
+                _data.append("summary")
+                newFileWriter.writerow(_data)
+            try:
+                __data = data[i]
+                summary = summarize(
+                    (data[i][position]), LANGUAGE, SENTENCES_COUNT)
+                __data.append(summary)
+                newFileWriter.writerow(__data)
+            except:
+                print('\n\n Error Skipping line \n\n')
+                sys.stdout.flush()
+
+
+def processCsv(path, LANGUAGE, SENTENCES_COUNT):
+    try:
+        print('\n\n Proessing Started \n\n')
+        sys.stdout.flush()
+        data = readCsv(path)
+        writeCsv(data, LANGUAGE, SENTENCES_COUNT)
+    except:
+        print('\n\n Invalid file in file path \n\n')
+        sys.stdout.flush()
+
+
+def main(argv=sys.argv):
+        # Configure logging
+    logging.basicConfig(filename='applog.log',
+                        filemode='w',
+                        level=logging.INFO,
+                        format='%(levelname)s:%(message)s')
+    args = parse_args(argv)
+    action = args.action
+    url = args.url
+    path = args.path
+    LANGUAGE = "english" if args.language is None else args.language
+    SENTENCES_COUNT = 2 if args.sentence is None else args.sentence
+    if action == 'bulk':
+        if path is None:
+            print(
+                '\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')
+            sys.stdout.flush()
+            return
+        # guide against errors
+        try:
+            processCsv(path, LANGUAGE, SENTENCES_COUNT)
+        except:
+            print(
+                '\n\n Invalid Entry!, please Ensure you enter a valid file path \n\n')
+            sys.stdout.flush()
+        print('Completed')
+        sys.stdout.flush()
+        if os.path.isfile('beneficiary.csv'):
+            return shutil.move('beneficiary.csv', path)
+        return
+    if action == 'simple':
+        # guide against errors
+        try:
+            summary = summarize(url, LANGUAGE, SENTENCES_COUNT)
+        except:
+            print(
+                '\n\n Invalid Entry!, please Ensure you enter a valid web link \n\n')
+            sys.stdout.flush()
+        print('Completed')
+        sys.stdout.flush()
+    else:
+        print(
+            '\nAction command is not supported\n for help: run python3 app.py -h'
+        )
+        sys.stdout.flush()
+        return
+
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/projects/web page summation/requirements.txt b/projects/web page summation/requirements.txt
new file mode 100644
index 00000000..dffb29c5
--- /dev/null
+++ b/projects/web page summation/requirements.txt	
@@ -0,0 +1,5 @@
+sumy
+
+nltk
+numpy
+argparse
diff --git a/projects/web page summation/utils/__init__.py b/projects/web page summation/utils/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/projects/web page summation/utils/comparison.py b/projects/web page summation/utils/comparison.py
new file mode 100644
index 00000000..322f9739
--- /dev/null
+++ b/projects/web page summation/utils/comparison.py	
@@ -0,0 +1,50 @@
+
+# https://github.com/chakki-works/sumeval
+# https://github.com/Tian312/awesome-text-summarization
+
+from sumeval.metrics.rouge import RougeCalculator
+from sumeval.metrics.bleu import BLEUCalculator
+
+
+def eval_rouges(refrence_summary, model_summary):
+    # refrence_summary = "tokyo shares close up #.## percent"
+    # model_summary = "tokyo stocks close up # percent to fresh record high"
+
+    rouge = RougeCalculator(stopwords=True, lang="en")
+
+    rouge_1 = rouge.rouge_n(
+        summary=model_summary,
+        references=refrence_summary,
+        n=1)
+
+    rouge_2 = rouge.rouge_n(
+        summary=model_summary,
+        references=[refrence_summary],
+        n=2)
+
+    rouge_l = rouge.rouge_l(
+        summary=model_summary,
+        references=[refrence_summary])
+
+    # You need spaCy to calculate ROUGE-BE
+
+    rouge_be = rouge.rouge_be(
+        summary=model_summary,
+        references=[refrence_summary])
+
+    bleu = BLEUCalculator()
+    bleu_score = bleu.bleu(summary=model_summary,
+                           references=[refrence_summary])
+
+    # print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format(
+    #    rouge_1, rouge_2, rouge_l, rouge_be
+    # ).replace(", ", "\n"))
+
+    return rouge_1, rouge_2, rouge_l, rouge_be, bleu_score
+
+# rouge_1, rouge_2,rouge_l,rouge_be = eval_rouges( "tokyo shares close up #.## percent",
+#                                                "tokyo stocks close up # percent to fresh record high")
+#
+# print("ROUGE-1: {}, ROUGE-2: {}, ROUGE-L: {}, ROUGE-BE: {}".format(
+#        rouge_1, rouge_2, rouge_l, rouge_be
+#    ).replace(", ", "\n"))
diff --git a/projects/web page summation/utils/model.py b/projects/web page summation/utils/model.py
new file mode 100644
index 00000000..b3b7d508
--- /dev/null
+++ b/projects/web page summation/utils/model.py	
@@ -0,0 +1,131 @@
+import tensorflow as tf
+from tensorflow.contrib import rnn
+#from utils import get_init_embedding
+
+
+class Model(object):
+    def __init__(self, reversed_dict, article_max_len, summary_max_len, args, forward_only=False):
+        self.vocabulary_size = len(reversed_dict)
+        self.embedding_size = args.embedding_size
+        self.num_hidden = args.num_hidden
+        self.num_layers = args.num_layers
+        self.learning_rate = args.learning_rate
+        self.beam_width = args.beam_width
+        if not forward_only:
+            self.keep_prob = args.keep_prob
+        else:
+            self.keep_prob = 1.0
+        self.cell = tf.nn.rnn_cell.BasicLSTMCell
+        with tf.variable_scope("decoder/projection"):
+            self.projection_layer = tf.layers.Dense(
+                self.vocabulary_size, use_bias=False)
+
+        self.batch_size = tf.placeholder(tf.int32, (), name="batch_size")
+        self.X = tf.placeholder(tf.int32, [None, article_max_len])
+        self.X_len = tf.placeholder(tf.int32, [None])
+        self.decoder_input = tf.placeholder(tf.int32, [None, summary_max_len])
+        self.decoder_len = tf.placeholder(tf.int32, [None])
+        self.decoder_target = tf.placeholder(tf.int32, [None, summary_max_len])
+        self.global_step = tf.Variable(0, trainable=False)
+
+        with tf.name_scope("embedding"):
+            if not forward_only and args.glove:
+                init_embeddings = tf.constant(get_init_embedding(
+                    reversed_dict, self.embedding_size), dtype=tf.float32)
+            else:
+                init_embeddings = tf.random_uniform(
+                    [self.vocabulary_size, self.embedding_size], -1.0, 1.0)
+            self.embeddings = tf.get_variable(
+                "embeddings", initializer=init_embeddings)
+            self.encoder_emb_inp = tf.transpose(
+                tf.nn.embedding_lookup(self.embeddings, self.X), perm=[1, 0, 2])
+            self.decoder_emb_inp = tf.transpose(tf.nn.embedding_lookup(
+                self.embeddings, self.decoder_input), perm=[1, 0, 2])
+
+        with tf.name_scope("encoder"):
+            fw_cells = [self.cell(self.num_hidden)
+                        for _ in range(self.num_layers)]
+            bw_cells = [self.cell(self.num_hidden)
+                        for _ in range(self.num_layers)]
+            fw_cells = [rnn.DropoutWrapper(cell) for cell in fw_cells]
+            bw_cells = [rnn.DropoutWrapper(cell) for cell in bw_cells]
+
+            encoder_outputs, encoder_state_fw, encoder_state_bw = tf.contrib.rnn.stack_bidirectional_dynamic_rnn(
+                fw_cells, bw_cells, self.encoder_emb_inp,
+                sequence_length=self.X_len, time_major=True, dtype=tf.float32)
+            self.encoder_output = tf.concat(encoder_outputs, 2)
+            encoder_state_c = tf.concat(
+                (encoder_state_fw[0].c, encoder_state_bw[0].c), 1)
+            encoder_state_h = tf.concat(
+                (encoder_state_fw[0].h, encoder_state_bw[0].h), 1)
+            self.encoder_state = rnn.LSTMStateTuple(
+                c=encoder_state_c, h=encoder_state_h)
+
+        with tf.name_scope("decoder"), tf.variable_scope("decoder") as decoder_scope:
+            decoder_cell = self.cell(self.num_hidden * 2)
+
+            if not forward_only:
+                attention_states = tf.transpose(self.encoder_output, [1, 0, 2])
+                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
+                    self.num_hidden * 2, attention_states, memory_sequence_length=self.X_len, normalize=True)
+                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
+                                                                   attention_layer_size=self.num_hidden * 2)
+                initial_state = decoder_cell.zero_state(
+                    dtype=tf.float32, batch_size=self.batch_size)
+                initial_state = initial_state.clone(
+                    cell_state=self.encoder_state)
+                helper = tf.contrib.seq2seq.TrainingHelper(
+                    self.decoder_emb_inp, self.decoder_len, time_major=True)
+                decoder = tf.contrib.seq2seq.BasicDecoder(
+                    decoder_cell, helper, initial_state)
+                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
+                    decoder, output_time_major=True, scope=decoder_scope)
+                self.decoder_output = outputs.rnn_output
+                self.logits = tf.transpose(
+                    self.projection_layer(self.decoder_output), perm=[1, 0, 2])
+                self.logits_reshape = tf.concat(
+                    [self.logits, tf.zeros([self.batch_size, summary_max_len - tf.shape(self.logits)[1], self.vocabulary_size])], axis=1)
+            else:
+                tiled_encoder_output = tf.contrib.seq2seq.tile_batch(
+                    tf.transpose(self.encoder_output, perm=[1, 0, 2]), multiplier=self.beam_width)
+                tiled_encoder_final_state = tf.contrib.seq2seq.tile_batch(
+                    self.encoder_state, multiplier=self.beam_width)
+                tiled_seq_len = tf.contrib.seq2seq.tile_batch(
+                    self.X_len, multiplier=self.beam_width)
+                attention_mechanism = tf.contrib.seq2seq.BahdanauAttention(
+                    self.num_hidden * 2, tiled_encoder_output, memory_sequence_length=tiled_seq_len, normalize=True)
+                decoder_cell = tf.contrib.seq2seq.AttentionWrapper(decoder_cell, attention_mechanism,
+                                                                   attention_layer_size=self.num_hidden * 2)
+                initial_state = decoder_cell.zero_state(
+                    dtype=tf.float32, batch_size=self.batch_size * self.beam_width)
+                initial_state = initial_state.clone(
+                    cell_state=tiled_encoder_final_state)
+                decoder = tf.contrib.seq2seq.BeamSearchDecoder(
+                    cell=decoder_cell,
+                    embedding=self.embeddings,
+                    start_tokens=tf.fill([self.batch_size], tf.constant(2)),
+                    end_token=tf.constant(3),
+                    initial_state=initial_state,
+                    beam_width=self.beam_width,
+                    output_layer=self.projection_layer
+                )
+                outputs, _, _ = tf.contrib.seq2seq.dynamic_decode(
+                    decoder, output_time_major=True, maximum_iterations=summary_max_len, scope=decoder_scope)
+                self.prediction = tf.transpose(
+                    outputs.predicted_ids, perm=[1, 2, 0])
+
+        with tf.name_scope("loss"):
+            if not forward_only:
+                crossent = tf.nn.sparse_softmax_cross_entropy_with_logits(
+                    logits=self.logits_reshape, labels=self.decoder_target)
+                weights = tf.sequence_mask(
+                    self.decoder_len, summary_max_len, dtype=tf.float32)
+                self.loss = tf.reduce_sum(
+                    crossent * weights / tf.to_float(self.batch_size))
+
+                params = tf.trainable_variables()
+                gradients = tf.gradients(self.loss, params)
+                clipped_gradients, _ = tf.clip_by_global_norm(gradients, 5.0)
+                optimizer = tf.train.AdamOptimizer(self.learning_rate)
+                self.update = optimizer.apply_gradients(
+                    zip(clipped_gradients, params), global_step=self.global_step)
diff --git a/projects/web page summation/utils/prepare.py b/projects/web page summation/utils/prepare.py
new file mode 100644
index 00000000..12854503
--- /dev/null
+++ b/projects/web page summation/utils/prepare.py	
@@ -0,0 +1,39 @@
+
+import wget
+import os
+import tarfile
+import gzip
+import zipfile
+import argparse
+
+
+#parser = argparse.ArgumentParser()
+#parser.add_argument("--glove", action="store_true")
+#args = parser.parse_args()
+
+# Extract data file
+#with tarfile.open(default_path + "sumdata/train/summary.tar.gz", "r:gz") as tar:
+#    tar.extractall()
+
+with gzip.open(default_path + "sumdata/train/train.article.txt.gz", "rb") as gz:
+    with open(default_path + "sumdata/train/train.article.txt", "wb") as out:
+        out.write(gz.read())
+
+with gzip.open(default_path + "sumdata/train/train.title.txt.gz", "rb") as gz:
+    with open(default_path + "sumdata/train/train.title.txt", "wb") as out:
+        out.write(gz.read())
+
+        
+#if args.glove:
+#    glove_dir = "glove"
+#    glove_url = "https://nlp.stanford.edu/data/wordvecs/glove.42B.300d.zip"
+#
+#    if not os.path.exists(glove_dir):
+#        os.mkdir(glove_dir)
+#
+#    # Download glove vector
+#    wget.download(glove_url, out=glove_dir)
+#
+#    # Extract glove file
+#    with zipfile.ZipFile(os.path.join("glove", "glove.42B.300d.zip"), "r") as z:
+#        z.extractall(glove_dir)
\ No newline at end of file
diff --git a/projects/web page summation/utils/summarize.py b/projects/web page summation/utils/summarize.py
new file mode 100644
index 00000000..74243944
--- /dev/null
+++ b/projects/web page summation/utils/summarize.py	
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+# load Dependancies
+
+from __future__ import absolute_import
+from __future__ import division, print_function, unicode_literals
+from sumy.parsers.html import HtmlParser
+from sumy.parsers.plaintext import PlaintextParser
+from sumy.nlp.tokenizers import Tokenizer
+from sumy.summarizers.lex_rank import LexRankSummarizer as Summarizer
+from sumy.nlp.stemmers import Stemmer
+from sumy.utils import get_stop_words
+import sys
+
+
+def summarize(url=None, LANGUAGE='English', SENTENCES_COUNT=2):
+        parser = HtmlParser.from_url(url, Tokenizer(LANGUAGE))
+        # or for plain text files
+        # parser = PlaintextParser.from_file("document.txt", Tokenizer(LANGUAGE))
+        stemmer = Stemmer(LANGUAGE)
+
+        summarizer = Summarizer(stemmer)
+        summarizer.stop_words = get_stop_words(LANGUAGE)
+        result = ''
+        for sentence in summarizer(parser.document, SENTENCES_COUNT):
+            result = result + ' ' + str(sentence)
+            try:
+                result = result + ' ' + str(sentence)
+
+            except:
+                print(
+                    '\n\n Invalid Entry!, please Ensure you enter a valid web link \n\n')
+                sys.stdout.flush()
+                return (
+                    '\n\n Invalid Entry!, please Ensure you enter a valid web link \n\n')
+        print('\n\n'+str(url)+'\n\n'+str(result))
+        sys.stdout.flush()
+        return result
diff --git a/projects/web page summation/utils/test.py b/projects/web page summation/utils/test.py
new file mode 100644
index 00000000..bfcdca62
--- /dev/null
+++ b/projects/web page summation/utils/test.py	
@@ -0,0 +1,79 @@
+import tensorflow as tf
+import pickle
+#from model import Model
+#from utils import build_dict, build_dataset, batch_iter
+
+
+# with open("args.pickle", "rb") as f:
+#    args = pickle.load(f)
+
+tf.reset_default_graph()
+
+
+class args:
+    pass
+
+
+args.num_hidden = 150
+args.num_layers = 2
+args.beam_width = 10
+args.glove = "store_true"
+args.embedding_size = 300
+
+args.learning_rate = 1e-3
+args.batch_size = 64
+args.num_epochs = 10
+args.keep_prob = 0.8
+
+args.toy = True
+
+args.with_model = "store_true"
+
+
+print("Loading dictionary...")
+word_dict, reversed_dict, article_max_len, summary_max_len = build_dict(
+    "valid", args.toy)
+print("Loading validation dataset...")
+valid_x = build_dataset(
+    "valid", word_dict, article_max_len, summary_max_len, args.toy)
+valid_x_len = [len([y for y in x if y != 0]) for x in valid_x]
+print("Loading article and reference...")
+article = get_text_list(valid_article_path, args.toy)
+reference = get_text_list(valid_title_path, args.toy)
+
+with tf.Session() as sess:
+    print("Loading saved model...")
+    model = Model(reversed_dict, article_max_len,
+                  summary_max_len, args, forward_only=True)
+    saver = tf.train.Saver(tf.global_variables())
+    ckpt = tf.train.get_checkpoint_state(default_path + "saved_model/")
+    saver.restore(sess, ckpt.model_checkpoint_path)
+
+    batches = batch_iter(valid_x, [0] * len(valid_x), args.batch_size, 1)
+
+    print("Writing summaries to 'result.txt'...")
+    for batch_x, _ in batches:
+        batch_x_len = [len([y for y in x if y != 0]) for x in batch_x]
+
+        valid_feed_dict = {
+            model.batch_size: len(batch_x),
+            model.X: batch_x,
+            model.X_len: batch_x_len,
+        }
+
+        prediction = sess.run(model.prediction, feed_dict=valid_feed_dict)
+        prediction_output = [[reversed_dict[y]
+                              for y in x] for x in prediction[:, 0, :]]
+        summary_array = []
+        with open(default_path + "result.txt", "a") as f:
+            for line in prediction_output:
+                summary = list()
+                for word in line:
+                    if word == "</s>":
+                        break
+                    if word not in summary:
+                        summary.append(word)
+                summary_array.append(" ".join(summary))
+                #print(" ".join(summary), file=f)
+
+    print('Summaries have been generated')
diff --git a/projects/web page summation/utils/train.py b/projects/web page summation/utils/train.py
new file mode 100644
index 00000000..d1a8d32f
--- /dev/null
+++ b/projects/web page summation/utils/train.py	
@@ -0,0 +1,126 @@
+import os
+import pickle
+import argparse
+import tensorflow as tf
+import time
+start = time.perf_counter()
+#from model import Model
+#from utils import build_dict, build_dataset, batch_iter
+
+# Uncomment next 2 lines to suppress error and Tensorflow info verbosity. Or change logging levels
+# tf.logging.set_verbosity(tf.logging.FATAL)
+# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+
+# def add_arguments(parser):
+#    parser.add_argument("--num_hidden", type=int, default=150, help="Network size.")
+#    parser.add_argument("--num_layers", type=int, default=2, help="Network depth.")
+#    parser.add_argument("--beam_width", type=int, default=10, help="Beam width for beam search decoder.")
+#    parser.add_argument("--glove", action="store_true", help="Use glove as initial word embedding.")
+#    parser.add_argument("--embedding_size", type=int, default=300, help="Word embedding size.")
+#
+#    parser.add_argument("--learning_rate", type=float, default=1e-3, help="Learning rate.")
+#    parser.add_argument("--batch_size", type=int, default=64, help="Batch size.")
+#    parser.add_argument("--num_epochs", type=int, default=10, help="Number of epochs.")
+#    parser.add_argument("--keep_prob", type=float, default=0.8, help="Dropout keep prob.")
+#
+#    parser.add_argument("--toy", action="store_true", help="Use only 50K samples of data")
+#
+#    parser.add_argument("--with_model", action="store_true", help="Continue from previously saved model")
+
+
+class args:
+    pass
+
+
+args.num_hidden = 150
+args.num_layers = 2
+args.beam_width = 10
+args.glove = "store_true"
+args.embedding_size = 300
+
+args.learning_rate = 1e-3
+args.batch_size = 64
+args.num_epochs = 10
+args.keep_prob = 0.8
+
+args.toy = False  # "store_true"
+
+args.with_model = "store_true"
+
+
+#parser = argparse.ArgumentParser()
+# add_arguments(parser)
+#args = parser.parse_args()
+# with open("args.pickle", "wb") as f:
+#    pickle.dump(args, f)
+
+if not os.path.exists(default_path + "saved_model"):
+    os.mkdir(default_path + "saved_model")
+else:
+    # if args.with_model:
+    old_model_checkpoint_path = open(
+        default_path + 'saved_model/checkpoint', 'r')
+    old_model_checkpoint_path = "".join(
+        [default_path + "saved_model/", old_model_checkpoint_path.read().splitlines()[0].split('"')[1]])
+
+
+print("Building dictionary...")
+word_dict, reversed_dict, article_max_len, summary_max_len = build_dict(
+    "train", args.toy)
+print("Loading training dataset...")
+train_x, train_y = build_dataset(
+    "train", word_dict, article_max_len, summary_max_len, args.toy)
+
+tf.reset_default_graph()
+
+with tf.Session() as sess:
+    model = Model(reversed_dict, article_max_len, summary_max_len, args)
+    sess.run(tf.global_variables_initializer())
+    saver = tf.train.Saver(tf.global_variables())
+    if 'old_model_checkpoint_path' in globals():
+        print("Continuing from previous trained model:",
+              old_model_checkpoint_path, "...")
+        saver.restore(sess, old_model_checkpoint_path)
+
+    batches = batch_iter(train_x, train_y, args.batch_size, args.num_epochs)
+    num_batches_per_epoch = (len(train_x) - 1) // args.batch_size + 1
+
+    print("\nIteration starts.")
+    print("Number of batches per epoch :", num_batches_per_epoch)
+    for batch_x, batch_y in batches:
+        batch_x_len = list(
+            map(lambda x: len([y for y in x if y != 0]), batch_x))
+        batch_decoder_input = list(
+            map(lambda x: [word_dict["<s>"]] + list(x), batch_y))
+        batch_decoder_len = list(
+            map(lambda x: len([y for y in x if y != 0]), batch_decoder_input))
+        batch_decoder_output = list(
+            map(lambda x: list(x) + [word_dict["</s>"]], batch_y))
+
+        batch_decoder_input = list(
+            map(lambda d: d + (summary_max_len - len(d)) * [word_dict["<padding>"]], batch_decoder_input))
+        batch_decoder_output = list(
+            map(lambda d: d + (summary_max_len - len(d)) * [word_dict["<padding>"]], batch_decoder_output))
+
+        train_feed_dict = {
+            model.batch_size: len(batch_x),
+            model.X: batch_x,
+            model.X_len: batch_x_len,
+            model.decoder_input: batch_decoder_input,
+            model.decoder_len: batch_decoder_len,
+            model.decoder_target: batch_decoder_output
+        }
+
+        _, step, loss = sess.run(
+            [model.update, model.global_step, model.loss], feed_dict=train_feed_dict)
+
+        if step % 1000 == 0:
+            print("step {0}: loss = {1}".format(step, loss))
+
+        if step % num_batches_per_epoch == 0:
+            hours, rem = divmod(time.perf_counter() - start, 3600)
+            minutes, seconds = divmod(rem, 60)
+            saver.save(sess, default_path +
+                       "saved_model/model.ckpt", global_step=step)
+            print(" Epoch {0}: Model is saved.".format(step // num_batches_per_epoch),
+                  "Elapsed: {:0>2}:{:0>2}:{:05.2f}".format(int(hours), int(minutes), seconds), "\n")
diff --git a/projects/web page summation/utils/utils.py b/projects/web page summation/utils/utils.py
new file mode 100644
index 00000000..86c492ab
--- /dev/null
+++ b/projects/web page summation/utils/utils.py	
@@ -0,0 +1,116 @@
+import re
+import collections
+import pickle
+import numpy as np
+from newspaper import Article
+from nltk.tokenize import word_tokenize
+from gensim.models.keyedvectors import KeyedVectors
+from gensim.test.utils import get_tmpfile
+from gensim.scripts.glove2word2vec import glove2word2vec
+
+def clean_str(sentence):
+    sentence = re.sub("[#.]+", "#", sentence)
+    return sentence
+
+
+def get_text_list(data_path, toy):
+    with open(data_path, "r", encoding="utf-8") as f:
+        if not toy:
+            return [clean_str(x.strip()) for x in f.readlines()][:200000]
+        else:
+            return [clean_str(x.strip()) for x in f.readlines()][:50]
+
+
+def build_dict(step, toy=False):
+    if step == "train":
+        train_article_list = get_text_list(train_article_path, toy)
+        train_title_list = get_text_list(train_title_path, toy)
+
+        words = list()
+        for sentence in train_article_list + train_title_list:
+            for word in word_tokenize(sentence):
+                words.append(word)
+
+        word_counter = collections.Counter(words).most_common()
+        word_dict = dict()
+        word_dict["<padding>"] = 0
+        word_dict["<unk>"] = 1
+        word_dict["<s>"] = 2
+        word_dict["</s>"] = 3
+        for word, _ in word_counter:
+            word_dict[word] = len(word_dict)
+
+        with open(default_path + "word_dict.pickle", "wb") as f:
+            pickle.dump(word_dict, f)
+
+    elif step == "valid":
+        with open(default_path + "word_dict.pickle", "rb") as f:
+            word_dict = pickle.load(f)
+
+    reversed_dict = dict(zip(word_dict.values(), word_dict.keys()))
+
+    article_max_len = 50
+    summary_max_len = 15
+
+    return word_dict, reversed_dict, article_max_len, summary_max_len
+
+
+def build_dataset(step, word_dict, article_max_len, summary_max_len, toy=False):
+    if step == "train":
+        article_list = get_text_list(train_article_path, toy)
+        title_list = get_text_list(train_title_path, toy)
+    elif step == "valid":
+        article_list = get_text_list(valid_article_path, toy)
+    else:
+        raise NotImplementedError
+
+    x = [word_tokenize(d) for d in article_list]
+    x = [[word_dict.get(w, word_dict["<unk>"]) for w in d] for d in x]
+    x = [d[:article_max_len] for d in x]
+    x = [d + (article_max_len - len(d)) * [word_dict["<padding>"]] for d in x]
+
+    if step == "valid":
+        return x
+    else:
+        y = [word_tokenize(d) for d in title_list]
+        y = [[word_dict.get(w, word_dict["<unk>"]) for w in d] for d in y]
+        y = [d[:(summary_max_len - 1)] for d in y]
+        return x, y
+
+
+def batch_iter(inputs, outputs, batch_size, num_epochs):
+    inputs = np.array(inputs)
+    outputs = np.array(outputs)
+
+    num_batches_per_epoch = (len(inputs) - 1) // batch_size + 1
+    for epoch in range(num_epochs):
+        for batch_num in range(num_batches_per_epoch):
+            start_index = batch_num * batch_size
+            end_index = min((batch_num + 1) * batch_size, len(inputs))
+            yield inputs[start_index:end_index], outputs[start_index:end_index]
+
+
+def get_init_embedding(reversed_dict, embedding_size):
+    #glove_file = default_path + "glove/glove.6B.300d.txt"
+    #word2vec_file = get_tmpfile(default_path + "word2vec_format.vec")
+    #glove2word2vec(glove_file, word2vec_file)
+    print("Loading Glove vectors...")
+    #word_vectors = KeyedVectors.load_word2vec_format(word2vec_file)
+
+    with open(default_path + "glove/model_glove_300.pkl", 'rb') as handle:
+        word_vectors = pickle.load(handle)
+
+    word_vec_list = list()
+    for _, word in sorted(reversed_dict.items()):
+        try:
+            word_vec = word_vectors.word_vec(word)
+        except KeyError:
+            word_vec = np.zeros([embedding_size], dtype=np.float32)
+
+        word_vec_list.append(word_vec)
+
+    # Assign random vector to <s>, </s> token
+    word_vec_list[2] = np.random.normal(0, 1, embedding_size)
+    word_vec_list[3] = np.random.normal(0, 1, embedding_size)
+
+    return np.array(word_vec_list)