## 1. Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
Ver = 'v2224'
BasePath = '/content/drive/My Drive/NLP-Resources/acronyms.{}'.format(Ver)

In [None]:
!mkdir -p "$BasePath"

In [None]:
# !wget https://github.com/DevopediaOrg/AcronymLookup/archive/main.zip
# !unzip -qq main.zip
# !mv AcronymLookup-main/* .
!unzip -qq code.zip

In [None]:
!pip install -r requirements.txt

In [None]:
!sudo apt-get -y -qq update
!sudo apt-get -y -qq install postgresql
!sudo service postgresql start

In [None]:
!sudo -u postgres psql -U postgres -c "ALTER USER postgres PASSWORD 'pgpwd';"
!sudo -u postgres psql -U postgres -c 'DROP DATABASE IF EXISTS acronyms;'
!sudo -u postgres psql -U postgres -c 'CREATE DATABASE acronyms;'

In [None]:
!sudo -u postgres psql -U postgres -d acronyms -f postgres-database/setUpDb.sql

## 2. Data Collection

In [None]:
# Skip downloading from source if data is already in Google Drive
# If so, copy from Google Drive
# Else, download and take a backup to Google Drive

import os.path

if os.path.exists('{}/train.zip'.format(BasePath)) and os.path.exists('{}/test.zip'.format(BasePath)):
    DataCollected = True
    !unzip -q "$BasePath/train.zip"
    !unzip -q "$BasePath/test.zip"
else:
    DataCollected = False

In [None]:
if not DataCollected:
    !python get_urls.py

In [None]:
if not DataCollected:
    !python download.py
    !zip -rq "$BasePath/train.zip" data/train
    !zip -rq "$BasePath/test.zip" data/test

In [None]:
!ls data/train/*.htm | wc -l
!ls data/test/*.htm | wc -l

## 3. Data Pre-processing

In [None]:
!python add2db.py

In [None]:
import psycopg2
import pandas as pd

def create_pandas_table(sql_query, database):
    table = pd.read_sql_query(sql_query, database)
    return table

conn = psycopg2.connect(database="acronyms", user="postgres", password="pgpwd", host="localhost")
cur = conn.cursor()

os.makedirs('data/db')
for tbl in ('acronyms', 'definitions', 'acronyms_definitions', 'true_definitions'):
    df = create_pandas_table("SELECT * FROM {}".format(tbl), conn)
    print(tbl, len(df))
    df.to_csv('data/db/{}.csv'.format(tbl))

cur.close()
conn.close()

In [None]:
!rm -f acronymsDb.zip; zip acronymsDb.zip data/db/*.csv

## 4. Training & Validation

In [None]:
!python train.py

## 5. Make Predictions

In [None]:
!python serve.py svc 'ALU is an essential part of a computer along with memory and peripherals.'