# DDG - MEDIA BIAS : LLM

In [None]:
fastapi==0.104.1
uvicorn==0.23.2
langchain==0.0.354
python-multipart==0.0.6
llama_cpp_python==0.2.26

# Import Modules

In [13]:
# Llama2
from langchain_community.llms import Ollama

# Prompt
from langchain.chains import LLMChain
from langchain.chains.prompt_selector import ConditionalPromptSelector
from langchain.prompts import PromptTemplate

# Output Parser
from langchain_core.output_parsers import StrOutputParser

# For using Llama2 with GGUF
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp

# For DataFrame
import pandas as pd

In [14]:
model_path = "llama-2-7b-chat_Q4_K_M.gguf"

output_parser = StrOutputParser()

llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=1,
    n_batch=1024,
    n_ctx=2048,
    f16_kv=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=True,
)

# Load the CSV file
df = pd.read_csv("Cleaned_2020_articles_mass.csv")

# is it in Boston? Y/N -> first character
# tell me the LOCATION where this article took place, SPECIFIC AS POSSIBLE
# your response may not exceed 2045 words.

prompt = PromptTemplate( input_variables=["headline", "body"], template="""<<SYS>> \n <</SYS>> \n\n [INST] 
    Generate response : is it in boston? yes or no,
    tell me the LOCATION where this article took place, BE SPECIFIC AS POSSIBLE,
    your response may not exceed 2045 words."\n\n
    Headline: \n\n {headline} \n\n Body: {body} \n\n [/INST]""",
    )

chain = prompt | llm | output_parser

llama_model_loader: loaded meta data with 19 key-value pairs and 291 tensors from llama-2-7b-chat_Q4_K_M.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                       llama.context_length u32              = 4096
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 11008
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128
llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 

In [10]:
# Placeholder function for generating summaries - replace this with your actual model interaction

def generate_summary(headline, body):

    summary = chain.invoke({
    "headline": headline,
    "body": body
    })

    return summary

In [11]:
#df_sliced = df.iloc[:2]

In [18]:
# word count < 2045 -> 100 rows
df_filtered = df.loc[df['actual_body_count'] < 2045].head(10)

In [19]:
# Iterate over the DataFrame and generate summaries
#df['Summary'] = df_filtered.apply(lambda row: generate_summary(row['hl1'], row['body_cleaned']), axis=1)
df_filtered['Summary'] = df_filtered.apply(lambda row: generate_summary(row['hl1'], row['body_cleaned']), axis=1)

Llama.generate: prefix-match hit


  Here is a short sentence for the given headline and body:
Boston has a 3-10 record in its last 13 games against Central teams, including a 6-4 loss to Chicago in their last matchup.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       3.32 ms /    52 runs   (    0.06 ms per token, 15643.80 tokens per second)
llama_print_timings: prompt eval time =   17672.37 ms /  1071 tokens (   16.50 ms per token,    60.60 tokens per second)
llama_print_timings:        eval time =    2578.27 ms /    51 runs   (   50.55 ms per token,    19.78 tokens per second)
llama_print_timings:       total time =   20340.46 ms /  1122 tokens
Llama.generate: prefix-match hit


  Here is a short sentence based on the provided headline and body:
The state funding formula is too generous to wealthy schools, according to a new report from the Business Alliance, which found that nearly half of all state aid goes to the wealthiest 20% of districts despite the need for increased funding in lower-income areas, where gaps in opportunities for achievement are likely to widen due to recent school closures related to the COVID-19 pandemic.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       6.79 ms /   105 runs   (    0.06 ms per token, 15463.92 tokens per second)
llama_print_timings: prompt eval time =   10260.04 ms /   697 tokens (   14.72 ms per token,    67.93 tokens per second)
llama_print_timings:        eval time =    5212.35 ms /   104 runs   (   50.12 ms per token,    19.95 tokens per second)
llama_print_timings:       total time =   15649.10 ms /   801 tokens
Llama.generate: prefix-match hit


  Sure! Here is a short sentence summarizing the information in the given headline and body:
In Massachusetts, there have been over 9,000 COVID-19 deaths, with a large party involving 50-60 Lincoln Sudbury Regional High School students breaking out on Saturday night, prompting officials to delay the return of students to school due to the potential impacts of the gathering, despite students wearing masks and practicing social distancing.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       6.33 ms /   101 runs   (    0.06 ms per token, 15968.38 tokens per second)
llama_print_timings: prompt eval time =    5292.53 ms /   345 tokens (   15.34 ms per token,    65.19 tokens per second)
llama_print_timings:        eval time =    4669.38 ms /   100 runs   (   46.69 ms per token,    21.42 tokens per second)
llama_print_timings:       total time =   10127.56 ms /   445 tokens
Llama.generate: prefix-match hit


  Sure! Here's a short sentence summarizing the article:
Rookie Aaron Nesmith had a pivotal performance in Game 7 of the Eastern Conference finals, hitting clutch free throws and deflecting a potential game-tying shot to help the Celtics defeat the Raptors.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       4.55 ms /    70 runs   (    0.06 ms per token, 15388.00 tokens per second)
llama_print_timings: prompt eval time =   10196.68 ms /   698 tokens (   14.61 ms per token,    68.45 tokens per second)
llama_print_timings:        eval time =    3511.68 ms /    69 runs   (   50.89 ms per token,    19.65 tokens per second)
llama_print_timings:       total time =   13823.62 ms /   767 tokens
Llama.generate: prefix-match hit


  In Boston, college students are facing harsh penalties for breaking COVID-19 rules, while conservatives argue that they are not being held accountable like other adults.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       2.54 ms /    39 runs   (    0.07 ms per token, 15372.49 tokens per second)
llama_print_timings: prompt eval time =    4552.02 ms /   274 tokens (   16.61 ms per token,    60.19 tokens per second)
llama_print_timings:        eval time =    1824.35 ms /    38 runs   (   48.01 ms per token,    20.83 tokens per second)
llama_print_timings:       total time =    6440.42 ms /   312 tokens
Llama.generate: prefix-match hit


  Sure! Here is a short sentence summarizing the information in the headline and body:
In Clinton, Massachusetts, an intrepid group of visitors can marvel at two engineering marvels - the 1906 Wachusett Dam and the Wachusett Reservoir - which were constructed using labor from local workers who worked year-round for five years to build the 944-foot-long dam that creates water for nearly 3 million people in the Boston area.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       9.03 ms /   107 runs   (    0.08 ms per token, 11850.70 tokens per second)
llama_print_timings: prompt eval time =    4601.23 ms /   282 tokens (   16.32 ms per token,    61.29 tokens per second)
llama_print_timings:        eval time =    5007.59 ms /   106 runs   (   47.24 ms per token,    21.17 tokens per second)
llama_print_timings:       total time =    9785.55 ms /   388 tokens
Llama.generate: prefix-match hit


  The New England Patriots explored trading defensive lineman Michael Gilmore before last season.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       1.41 ms /    22 runs   (    0.06 ms per token, 15613.91 tokens per second)
llama_print_timings: prompt eval time =    3173.47 ms /   157 tokens (   20.21 ms per token,    49.47 tokens per second)
llama_print_timings:        eval time =     896.15 ms /    21 runs   (   42.67 ms per token,    23.43 tokens per second)
llama_print_timings:       total time =    4103.11 ms /   178 tokens
Llama.generate: prefix-match hit


  In 1898, the Supreme Court issued a decision in Williams v. Mississippi that denied many African Americans in the South the right to vote, setting off a long struggle for voting rights that would take decades to overcome.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       4.13 ms /    49 runs   (    0.08 ms per token, 11850.06 tokens per second)
llama_print_timings: prompt eval time =    5650.33 ms /   374 tokens (   15.11 ms per token,    66.19 tokens per second)
llama_print_timings:        eval time =    2366.74 ms /    48 runs   (   49.31 ms per token,    20.28 tokens per second)
llama_print_timings:       total time =    8098.05 ms /   422 tokens
Llama.generate: prefix-match hit





llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =      11.13 ms /   176 runs   (    0.06 ms per token, 15817.38 tokens per second)
llama_print_timings: prompt eval time =   19875.34 ms /  1231 tokens (   16.15 ms per token,    61.94 tokens per second)
llama_print_timings:        eval time =    9168.35 ms /   175 runs   (   52.39 ms per token,    19.09 tokens per second)
llama_print_timings:       total time =   29353.40 ms /  1406 tokens
Llama.generate: prefix-match hit


  In Massachusetts, there is a fundamental racial bias in the court system, with mostly white judges and a lack of diversity on the bench, according to a recent study.


llama_print_timings:        load time =   18509.63 ms
llama_print_timings:      sample time =       2.52 ms /    39 runs   (    0.06 ms per token, 15445.54 tokens per second)
llama_print_timings: prompt eval time =    6845.98 ms /   455 tokens (   15.05 ms per token,    66.46 tokens per second)
llama_print_timings:        eval time =    1843.68 ms /    38 runs   (   48.52 ms per token,    20.61 tokens per second)
llama_print_timings:       total time =    8754.03 ms /   493 tokens


In [None]:
# Limit number of rows to iterate over
#row_limit = 1

# Slice the DataFrame to the first 'row_limit' rows and then apply the function
#df = df.loc[:row_limit-1, 'Summary'] = df.loc[:row_limit-1].apply(lambda row: generate_summary(row['hl1'], row['body_cleaned']), axis=1)

In [None]:
#df.head(5)

# Export as CSV

In [None]:
# Save the updated DataFrame to a new CSV file
df.to_csv("2020_articles_mass_summaries_10rows.csv", index=False)