### **Import**

In [None]:
import os
import pandas as pd
from processing_utils import vector_db
from config import config_params
from suggestion_engine import SuggestionEngine
from processing_utils import resource_preprocessing

In [2]:
engine = SuggestionEngine()

### **Creating the initial KB**

**Populate KB with tweets**

In [2]:
tweets = pd.read_csv(config_params["path_tweets_raw"])

#tweet-specific pre-processing
tweets = resource_preprocessing.tweet_date_range(tweets, 'created_at', [2014, 2022]) #tweets after 2014 and before 2022
tweets = resource_preprocessing.tweet_shuffle(tweets, 1, 41) #shuffle tweets
tweets = resource_preprocessing.rm_links_handles(tweets, 'text') #adds new column "text_clean" that contains the text without links and handles

tweets_prepared_dir = os.path.dirname(config_params["path_tweets_raw"]) + '/prepared'
os.mkdir(tweets_prepared_dir)
tweets.to_pickle(tweets_prepared_dir+'/tweets_prepared.pkl')

In [None]:
vector_db.create_db_collection(tweets_prepared_dir, 'id', 'text_clean', engine.qdrantdb_client, engine.tweet_collection_name, engine.search_embedder, cross_dataset_preprocess=True)

**Populate KB with abstracts**

In [None]:
vector_db.create_db_collection(config_params["path_abstracts"], 'corpusid', 'abstract', engine.qdrantdb_client, engine.abstract_collection_name, engine.search_embedder, cross_dataset_preprocess=True)

## **Suggestion Generation**

**Load Feedback**

In [17]:
tweets = pd.read_pickle("")

#feedback-specific preprocessing
#tweets = resource_preprocessing.rm_links_handles(tweets, 'text')

engine.load_feedback(tweets, 'id', 'text_clean', cross_dataset_preprocess=True)

**Process Weakness Identification**

In [18]:
feedback_weakness_batch, weakness_cluster_batch = engine.weaknesses_identification()

**Weakness Clustering**

In [20]:
weakness_cluster_batch = engine.weaknesses_clustering(cluster_min_size = 1, cluster_threshold=0.75)

**Search Query Generation for each Cluster**

In [24]:
cluster_queries_batch = engine.queries_generation(cluster_max_size = 10)

**Improvement Suggestion Generation for each Cluster**

In [None]:
cluster_queries_batch, weakness_cluster_batch, feedback_weakness_batch = engine.cluster_suggestion_generation(10, 10)

**Merging Suggestions for each Tweet**

In [28]:
feedback_weakness_batch = engine.feedback_answer_generation()

**Saving**

In [27]:
feedback_weakness_batch.to_pickle("")
cluster_queries_batch.to_pickle("")
weakness_cluster_batch.to_pickle("")