In [None]:
import logging
import os
import re
import time
from pathlib import Path

import bert_score
import matplotlib.pyplot as plt
import nest_asyncio
import pandas as pd
import seaborn as sns
import textstat
from io_functions import get_ollama_version, load_if_scraped, popen, run
from llm_parser import DeepSeekAPI
from nltk.translate.bleu_score import sentence_bleu
from rouge import Rouge
from tqdm.notebook import tqdm

In [None]:
logging.basicConfig(level=logging.INFO, force=True)

In [None]:
ollama_server_process = popen('ollama serve')

ollama_server_process

In [None]:
get_ollama_version()

In [None]:
process = run('ollama list')

print(process.stdout)

In [None]:
models = [line.strip() for line in process.stdout.split('\n') if line != ''][1:]

model_list = [model.split()[0] for model in models]
model_list

To prevent running a non-existing model:

In [None]:
model_name = model_list[0]
model_name

In [None]:
model_name = 'deepseek-r1:70b'

In [None]:
if model_name not in model_list:
    run(f'ollama pull {model_name}')

In [None]:
print(run(f'ollama show {model_name}').stdout)

In [None]:
def extract_model_info(text: str):
    embedding_match = re.search(r'embedding length\s+(\d+)', text)
    context_match = re.search(r'context length\s+(\d+)', text)

    embedding_length = int(embedding_match.group(1)) if embedding_match else None
    context_length = int(context_match.group(1)) if context_match else None

    return {'embedding_length': embedding_length, 'context_length': context_length}


model_info = extract_model_info(run(f'ollama show {model_name}').stdout)
model_info

In [None]:
print(run('ollama ps').stdout)

In [None]:
company_id = '312932093'

In [None]:
df = load_if_scraped(company_id=company_id)

print(df.shape)
df.head()

In [None]:
api = DeepSeekAPI(model_name=model_name)

print(api)

In [None]:
nest_asyncio.apply()


async def batch_summary_generation(api: DeepSeekAPI, texts) -> list[str]:
    summaries = []
    times = []

    for text in tqdm(texts):
        prompt = 'summarize: ' + text

        start_time = time.time()
        summary = await api.generate(prompt=prompt)
        end_time = time.time()
        summaries.append(summary)
        times.append(end_time - start_time)
    return summaries, times


summaries, times = await batch_summary_generation(api, df.full_text)

In [None]:
print(summaries[0])

In [None]:
summary_column = f'{model_name}-summaries'
df[summary_column] = summaries
df['time_spent'] = times

In [None]:
def evaluate_summary(row, model_name, type='baseline'):
    text_to_summarize = row.full_text
    summary = row[summary_column]
    uuid = row.uuid
    company_id = row.companyid
    company_name = row.companyname

    rouge_evaluator = Rouge()
    rouge_scores = rouge_evaluator.get_scores(summary, text_to_summarize)

    if isinstance(rouge_scores, list):
        rouge_scores = rouge_scores[0]

    reference_tokens = text_to_summarize.split()
    candidate_tokens = summary.split()
    bleu_score = sentence_bleu([reference_tokens], candidate_tokens)

    P, R, F1 = bert_score.score(
        [summary], [text_to_summarize], rescale_with_baseline=True, lang='en'
    )

    original_len = len(text_to_summarize.split())
    summary_len = len(summary.split())
    compression_ratio = summary_len / original_len if original_len > 0 else 0

    readability = textstat.flesch_reading_ease(summary)

    results = {}
    results['model_name'] = model_name
    results['uuid'] = uuid
    results['companyid'] = company_id
    results['companyname'] = company_name
    results['time_spent'] = row.time_spent

    for metric, scores in rouge_scores.items():
        results[f'{metric}_r'] = scores['r']
        results[f'{metric}_p'] = scores['p']
        results[f'{metric}_f'] = scores['f']

    results['bleu'] = bleu_score
    results['bert_precision'] = P.item()
    results['bert_recall'] = R.item()
    results['bert_f1'] = F1.item()
    results['compression_ratio'] = compression_ratio
    results['readability'] = readability

    return pd.DataFrame([results])

In [None]:
evaluation_results = pd.DataFrame()

for _, row in tqdm(df.iterrows()):
    evaluation_result = evaluate_summary(row, model_name)
    evaluation_results = pd.concat([evaluation_results, evaluation_result], ignore_index=True)

evaluation_results.head()

In [None]:
csv_filename = Path('..') / 'data' / 'evaluation_results.csv'

if os.path.exists(csv_filename):
    existing_df = pd.read_csv(csv_filename)
    if ((existing_df.model_name == model_name) & (existing_df.companyid == company_id)).any():
        logging.info(
            f'model {model_name} and {company_id} '
            f'combination already exists in {csv_filename}. '
            f'no new row added.'
        )
        updated_df = existing_df
    else:
        updated_df = pd.concat([existing_df, evaluation_results], ignore_index=True)
        logging.info(f'model {model_name} not found. appending new row to {csv_filename}.')
else:
    updated_df = evaluation_results
    logging.info(f'{csv_filename} not found. creating new file.')

updated_df.to_csv(csv_filename, index=False)
logging.info(f'results saved to {csv_filename}')

In [None]:
def visualize_results(df):
    '''
    Visualizes evaluation metrics stored in a DataFrame.

    Parameters:
    df (pandas.DataFrame): DataFrame containing evaluation results with numeric metric columns.

    This function produces:
    - Histograms for each numeric metric.
    - A correlation heatmap of the numeric metrics.
    '''
    numeric_cols = df.select_dtypes(include='number').columns.tolist()

    df[numeric_cols].hist(bins=20, figsize=(15, 10))
    plt.suptitle('Histograms of Evaluation Metrics', fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.96])
    plt.show()

    plt.figure(figsize=(12, 10))
    correlation_matrix = df[numeric_cols].corr()
    sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
    plt.title('Correlation Heatmap of Evaluation Metrics', fontsize=16)
    plt.show()


visualize_results(evaluation_results)

In [None]:
df.to_csv(
    Path('..') / 'data' / 'summaries' / f'{company_id}_{model_name}.csv'.replace('/', '-'),
    sep='\t',
    index=False,
    quoting=1,
    escapechar='\\',
    doublequote=True,
    quotechar='"',
    lineterminator='\n',
)

In [None]:
print(df.full_text[0][:2000])

In [None]:
print(df[summary_column][0])

In [None]:
ollama_server_process.terminate()