edited readme and streamlit app

MartinKalema · Jun 11, 2024 · c316114 · c316114
1 parent 616de9f
commit c316114
Show file tree

Hide file tree

Showing 2 changed files with 136 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -14,6 +14,134 @@ The objective of this hackathon is to develop a multi-class classification model
 
 <img src="static/model_training_pipeline_large.png" />
 
+### Folder structure
+
+```
+├── app.py
+├── artifacts
+│   └── data_ingestion
+│       ├── compressed
+│       │   ├── test_data.zip
+│       │   └── train_data.zip
+│       └── decompressed
+│           ├── Test.csv
+│           └── Train.csv
+├── autopep.py
+├── bfg.jar
+├── codecov.yml
+├── configuration
+│   └── configuration.yaml
+├── Dockerfile
+├── dvc.yaml
+├── LICENSE
+├── logs
+│   ├── 20240608-124455.log
+│   ├── 20240608-172339.log
+│   ├── 20240608-172525.log
+│   ├── 20240608-172559.log
+│   ├── 20240608-173514.log
+│   ├── 20240608-173643.log
+│   ├── 20240608-173740.log
+│   ├── 20240608-174011.log
+│   ├── 20240608-174135.log
+│   ├── 20240608-174314.log
+│   ├── 20240608-175835.log
+│   ├── 20240608-180027.log
+│   ├── 20240608-180124.log
+│   ├── 20240608-180221.log
+│   ├── 20240608-180323.log
+│   ├── 20240608-220709.log
+│   ├── 20240608-221302.log
+│   ├── 20240608-221405.log
+│   ├── 20240608-221929.log
+│   ├── 20240608-222301.log
+│   ├── 20240608-222351.log
+│   ├── 20240609-115543.log
+│   ├── 20240609-152329.log
+│   ├── 20240611-123013.log
+│   ├── 20240611-130747.log
+│   └── 20240611-130938.log
+├── main.py
+├── models
+│   ├── model_name.pth
+│   └── text_classifier_learner.pkl
+├── parameters.yaml
+├── README.md
+├── requirements.txt
+├── research
+│   ├── 01_data_ingestion.ipynb
+│   ├── 02_model_training.ipynb
+│   └── trails.ipynb
+├── setup.py
+├── src
+│   ├── swahiliNewsClassifier
+│   │   ├── components
+│   │   │   ├── data_ingestion.py
+│   │   │   ├── __init__.py
+│   │   │   ├── model_training_and_evaluation.py
+│   │   │   └── __pycache__
+│   │   │       ├── data_ingestion.cpython-311.pyc
+│   │   │       ├── data_ingestion.cpython-38.pyc
+│   │   │       ├── __init__.cpython-311.pyc
+│   │   │       └── __init__.cpython-38.pyc
+│   │   ├── configuration
+│   │   │   ├── configuration.py
+│   │   │   ├── __init__.py
+│   │   │   └── __pycache__
+│   │   │       ├── configuration.cpython-311.pyc
+│   │   │       ├── configuration.cpython-38.pyc
+│   │   │       ├── __init__.cpython-311.pyc
+│   │   │       └── __init__.cpython-38.pyc
+│   │   ├── constants
+│   │   │   ├── __init__.py
+│   │   │   └── __pycache__
+│   │   │       ├── __init__.cpython-311.pyc
+│   │   │       └── __init__.cpython-38.pyc
+│   │   ├── entity
+│   │   │   ├── entities.py
+│   │   │   ├── __init__.py
+│   │   │   └── __pycache__
+│   │   │       ├── entities.cpython-311.pyc
+│   │   │       ├── entities.cpython-38.pyc
+│   │   │       ├── __init__.cpython-311.pyc
+│   │   │       └── __init__.cpython-38.pyc
+│   │   ├── __init__.py
+│   │   ├── pipeline
+│   │   │   ├── __init__.py
+│   │   │   ├── __pycache__
+│   │   │   │   ├── __init__.cpython-311.pyc
+│   │   │   │   └── stage_01_data_ingestion.cpython-311.pyc
+│   │   │   ├── stage_01_data_ingestion.py
+│   │   │   └── stage_02_model_training_and_evaluation.py
+│   │   ├── __pycache__
+│   │   │   ├── __init__.cpython-311.pyc
+│   │   │   └── __init__.cpython-38.pyc
+│   │   └── utilities
+│   │       ├── helper_functions.py
+│   │       ├── _init__.py
+│   │       └── __pycache__
+│   │           ├── helper_functions.cpython-311.pyc
+│   │           └── helper_functions.cpython-38.pyc
+│   └── swahiliNewsClassifier.egg-info
+│       ├── dependency_links.txt
+│       ├── PKG-INFO
+│       ├── SOURCES.txt
+│       └── top_level.txt
+├── static
+│   ├── architecture.png
+│   └── model_training_pipeline_large.png
+├── template.py
+└── tests
+    ├── conftest.py
+    ├── __init__.py
+    ├── __pycache__
+    │   ├── conftest.cpython-311-pytest-7.4.0.pyc
+    │   ├── __init__.cpython-311.pyc
+    │   └── test_data_ingestion.cpython-311-pytest-7.4.0.pyc
+    └── test_data_ingestion.py
+
+```
+
 ### How to install
 
 Clone the repository

diff --git a/app.py b/app.py
@@ -1,39 +1,23 @@
 import streamlit as st
-import random
-import time
+from fastai.text.all import *
 
-# from fastai.text.all import *
-
-# @st.cache_resource
-# def load_model():
-#     learn = load_learner('models/text_classifier_learner.pkl')
-#     return learn
-
-# learn = load_model()
-
-classes = ["Kitaifa", "Biashara", "Kimataifa"]
+@st.cache_resource
+def load_model():
+    with st.spinner('Model is being loaded...'):
+        learn = load_learner('models/text_classifier_model.pkl')
+    return learn
 
 st.title('ULMFiT Swahili News Article Classifier')
 
 st.markdown("""
 ULMFiT (Universal Language Model Fine-tuning) is an effective transfer learning method for NLP tasks.
 """)
 
-def run_spinner():
-    with st.spinner('Model is being loaded . . .'):
-        time.sleep(15)  
-
-run_spinner()
-
 user_text = st.text_area('Enter text for classification')
 
 if st.button('Classify'):
     if user_text:
-        if len(user_text) > 200:
-            time.sleep(3)
-            pred_class = random.choice(classes)
-            st.write(f"Input text belongs to: {pred_class}")
-        else:
-            st.write("Text too short. Please enter text with more than 200 characters.")
+        pred_class, pred_idx, outputs = learn.predict(user_text)
+        st.write(f"Input text belongs to: {pred_class}")
     else:
         st.write("Please enter text to classify.")