In [3]:
import spacy
from spacy import displacy
from collections import Counter
import pandas as pd
pd.options.display.max_rows = 400
pd.options.display.max_colwidth =  400

In [4]:
import os
import spacy

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

# Folder containing the .txt files
folder_path = "../negative_DeepSeek_texts"  # adjust if necessary

# Store adjectives per file
adjectives_by_file = {}

# Loop through each .txt file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".txt"):
        file_path = os.path.join(folder_path, filename)
        
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
            doc = nlp(text)

            # Extract all adjectives (in lemma form)
            adjectives = [token.lemma_ for token in doc if token.pos_ == "ADJ"]
            adjectives_by_file[filename] = adjectives

# Display adjectives per file
for file, adjectives in adjectives_by_file.items():
    print(f"\n📄 {file} — {len(adjectives)} adjectives found:")
    print(", ".join(adjectives))



📄 Global_AI_rivalry_is_a_dangerous_game.txt — 85 adjectives found:
former, senior, senior, chief, incoming, human, artificial, general, existential, numerous, last, least, different, intelligent, possible, chinese, focused, widespread, chinese, less, american, chinese, human, top, chinese, chinese, official, precise, general, many, human, chinese, obvious, physical, necessary, slow, practical, industrial, vast, future, chinese, abstract, imminent, tangible, wrong, human, third, possible, worrying, secretive, new, large, nuclear, cold, strong, different, direct, intense, profound, technological, unexpected, technological, right, human, irretrievable, strategic, multiple, offensive, diplomatic, military, ugly, own, key, strategic, unprecedented, ill, prepared, strategic, technological, many, alone, geopolitical, new, dangerous, many

📄 What_questions_will_China's_DeepSeek_not_answer?_–.txt — 162 adjectives found:
sensitive, advanced, low, other, chinese, artificial, regulatory, sensitiv

In [5]:
# Create counters for adjectives and verbs
adjective_counter = Counter()
verb_counter = Counter()

# Loop through each file
for filename in os.listdir(folder_path):
    if filename.endswith(".txt"):
        file_path = os.path.join(folder_path, filename)
        
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
            doc = nlp(text)

            # Extract adjectives and verbs
            for token in doc:
                if token.pos_ == "ADJ":
                    adjective_counter[token.lemma_.lower()] += 1
                elif token.pos_ == "VERB":
                    verb_counter[token.lemma_.lower()] += 1

# Show top 50 most common adjectives and verbs
print("🔤 Top 50 Adjectives:")
for word, freq in adjective_counter.most_common(50):
    print(f"{word}: {freq}")

print("\n🔧 Top 20 Verbs:")
for word, freq in verb_counter.most_common(20):
    print(f"{word}: {freq}")


🔤 Top 50 Adjectives:
chinese: 44
political: 13
many: 12
english: 12
such: 11
human: 10
last: 10
artificial: 9
new: 8
sensitive: 8
local: 8
american: 7
less: 6
large: 5
technological: 5
low: 5
detailed: 5
high: 5
good: 5
personal: 5
different: 4
official: 4
inseparable: 4
cultural: 4
social: 4
similar: 4
first: 4
chief: 3
general: 3
possible: 3
widespread: 3
cold: 3
strategic: 3
diplomatic: 3
own: 3
advanced: 3
other: 3
clear: 3
cheap: 3
sovereign: 3
international: 3
legal: 3
single: 3
great: 3
global: 3
non: 3
-: 3
current: 3
south: 3
korean: 3

🔧 Top 20 Verbs:
say: 27
use: 16
ask: 13
take: 12
make: 9
lead: 7
create: 7
build: 7
see: 7
send: 7
provide: 7
give: 7
remain: 6
let: 6
come: 6
consider: 6
claim: 6
have: 6
suspend: 6
appear: 5


In [9]:
df = pd.DataFrame(adjective_counter.most_common(), columns=['adj', 'count'])
df[:100]

Unnamed: 0,adj,count
0,chinese,44
1,political,13
2,many,12
3,english,12
4,such,11
5,human,10
6,last,10
7,artificial,9
8,new,8
9,sensitive,8


In [13]:
# Save top 20 adjectives to CSV
df.head(20).to_csv("../outputs/top_adjectives_negative_texts.csv", index=False)

Keyword Extraction

In [58]:
import os
import re
import spacy
from IPython.display import Markdown, display

# Load spaCy English model
nlp = spacy.load("en_core_web_sm")

def find_sentences_with_keyword(doc, keyword):
    """
    Find and display sentences from a spaCy document that contain a specific keyword.
    Keyword will be highlighted using Markdown (bolded).
    """
    found = False  # to track if any sentence matched

    for sentence in doc.sents:
        sentence_text = sentence.text

        if keyword.lower() in sentence_text.lower():
            sentence_clean = re.sub(r"\n", " ", sentence_text)
            sentence_highlighted = re.sub(
                f"({re.escape(keyword)})", r"**\1**", sentence_clean, flags=re.IGNORECASE
            )
            display(Markdown(sentence_highlighted))
            found = True
    
    if not found:
        display(Markdown("_No sentences with keyword found._"))

# Folder with your .txt files
folder_path = "../negative_DeepSeek_texts"

# Keyword to search
keyword = "political"

# Loop through files and process each
for filename in os.listdir(folder_path):
    if filename.endswith(".txt"):
        file_path = os.path.join(folder_path, filename)
        
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
            doc = nlp(text)

            # Display the filename
            display(Markdown(f"## 📄 {filename}"))
            # Find and display matching sentences
            find_sentences_with_keyword(doc, keyword=keyword)


## 📄 Global_AI_rivalry_is_a_dangerous_game.txt

Many struggle to stay up to date with AI developments, let alone plan for the geo**political** ramifications of new breakthroughs.

## 📄 What_questions_will_China's_DeepSeek_not_answer?_–.txt

The DeepSeek AI chatbot becomes tongue-tied when asked about issues seen as **political**ly sensitive by China's Communist Party.

However, like other Chinese artificial intelligence chatbots operating under China's regulatory framework, DeepSeek's responses to **political**ly sensitive topics reveal clear limitations.  

"  We then tested four more **political**ly related questions, covering Taiwan's elections, diplomatic ties, **political** parties and potential conflict scenarios.  

Only the question about Taiwan's **political** parties received a response.

However, one response covering Taiwan's **political** parties was also deleted within two seconds of being generated.  

Beijing considers Taiwan as its territory that will eventually be "reunited" with the mainland, so the responses to **political** questions were not surprising.

However, the Chinese version inserted **political** slogans, even calling Taiwan's highest mountain, Yushan, "the highest peak in Eastern China" and concluding with "the great rejuvenation of the Chinese nation will inevitably be achieved.

The student-led protesters were calling for **political** reforms.  

To this day, it remains one of the most **political**ly sensitive topics in China, and any mention of the massacre in the public sphere is censored.  

When asked, "what impact will Xi Jinping's constitutional amendment to remove term limits have on China's **political** system?"

"Tibet has a long history as a distinct cultural and **political** entity …

"The LGBTQ+ community in China faces social stigma, legal limitations, cultural barriers, and lack of representation."  DeepSeek's self-censorship  In summary, when it comes to **political** questions, DeepSeek's Chinese version mostly refused to answer or followed strict government narratives.

Even on non-**political** questions, the Chinese version still injected ideological messaging into answers.  

However, on non-**political** topics, the English responses mostly remained neutral and informative.  

## 📄 DeepSeek:_Is_China's_AI_tool_as_good_as_it_seems?.txt

_No sentences with keyword found._

## 📄 First_Thing:_Donald_Trump_calls_China’s_DeepSeek_A.txt

_No sentences with keyword found._

## 📄 South_Korea_Bans_Downloads_of_DeepSeek,_the_Chines.txt

_No sentences with keyword found._

## 📄 DeepSeek_has_ripped_away_AI’s_veil_of_mystique._Th.txt

Such claims derive less from technological possibilities than from **political** and economic needs.

It is the hype that drives the billion-dollar investment and buys **political** influence, including a seat at the presidential inauguration.  

It is a reflection, too, of geo**political** tensions.

## 📄 Diving_into_DeepSeek:_inside_the_7_February_Guardi.txt

_No sentences with keyword found._

When I used part-of-speech tagging to analyze the texts with negative sentiment scores, I found that the most frequent adjective was "Chinese", followed by "political". The word political appeared 13 times across 11 articles, making it a notable and valuable term for further analysis.

Because of its frequency and prominence, I selected political as a keyword and examined all the sentences in which it appeared. I also included geopolitical in this keyword search. By zooming in on these sentences, we can see the specific contexts and topics being discussed in the articles — particularly how they relate to political or geopolitical concerns involving China and the United States.

This method helps explain why these articles were assigned highly negative sentiment scores. The negativity may not stem from the AI tool DeepSeek itself, but rather from underlying political anxieties, such as fears of censorship, state control, or geopolitical competition.

This demonstrates that POS-based adjective analysis is a valuable research tool. It helps identify dominant themes in highly negative articles — in this case, clearly pointing toward politics as a central topic. For example, several articles mention that DeepSeek is owned by the Chinese government, which raises concerns about state influence over the technology. Others highlight that DeepSeek avoids answering sensitive political questions, reinforcing concerns about censorship and control.

Therefore, we can confidently conclude that the political dimension — both domestic and international — is a major theme in these negative articles.