In [19]:
import stanza
import pandas as pd

# Load the Stanza model for English
nlp = stanza.Pipeline('en')

# Load your data
file_path = "H:/Downloads/7000 sentences Corpus with IDs.xlsx"
sheet_name = "3000"  # Change as required
df = pd.read_excel(file_path, sheet_name=sheet_name)

# Ensure the data has an "English" column
if "English" not in df.columns:
    raise ValueError("No 'English' column found in the sheet!")

# Define a function to extract nouns and pronouns using Stanza
def extract_nouns_pronouns(sentence):
    doc = nlp(sentence)  # Process the sentence with Stanza pipeline
    nouns = [word.text for sent in doc.sentences for word in sent.words if word.upos.startswith("N")][:3]
    pronouns = [word.text for sent in doc.sentences for word in sent.words if word.upos == "PRON"][:3]
    return nouns, pronouns

# Apply the function to the English sentences
df["Nouns_Pronouns"] = df["English"].apply(
    lambda x: extract_nouns_pronouns(x) if isinstance(x, str) else ([], [])
)

# Split into separate columns
df[["Nouns", "Pronouns"]] = pd.DataFrame(df["Nouns_Pronouns"].tolist(), index=df.index)

# Drop the combined column for clarity
df.drop(columns=["Nouns_Pronouns"], inplace=True)

# Save the results
output_path = "H:/Downloads/Nouns_Pronouns_Extracted_Stanza.xlsx"
df.to_excel(output_path, index=False)

print(f"Results saved to {output_path}")


  from .autonotebook import tqdm as notebook_tqdm
2024-11-22 12:08:17 INFO: Checking for updates to resources.json in case models have been updated.  Note: this behavior can be turned off with download_method=None or download_method=DownloadMethod.REUSE_RESOURCES
Downloading https://raw.githubusercontent.com/stanfordnlp/stanza-resources/main/resources_1.9.0.json: 392kB [00:00, 174MB/s]                     
2024-11-22 12:08:17 INFO: Downloaded file to C:\Users\CYTech Student\stanza_resources\resources.json
Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.9.0/models/tokenize/combined.pt: 100%|██████████| 651k/651k [00:00<00:00, 12.1MB/s]
Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.9.0/models/mwt/combined.pt: 100%|██████████| 264k/264k [00:00<00:00, 10.4MB/s]
Downloading https://huggingface.co/stanfordnlp/stanza-en/resolve/v1.9.0/models/pos/combined_charlm.pt: 100%|██████████| 38.6M/38.6M [00:01<00:00, 30.2MB/s]
Downloading https://huggingface.co

Results saved to H:/Downloads/Nouns_Pronouns_Extracted_Stanza.xlsx


In [18]:
pip install stanza


Collecting stanza
  Downloading stanza-1.9.2-py3-none-any.whl (1.1 MB)
     ---------------------------------------- 1.1/1.1 MB 6.7 MB/s eta 0:00:00
Collecting protobuf>=3.15.0
  Downloading protobuf-5.28.3-cp310-abi3-win_amd64.whl (431 kB)
     ------------------------------------- 431.5/431.5 KB 13.6 MB/s eta 0:00:00
Collecting emoji
  Downloading emoji-2.14.0-py3-none-any.whl (586 kB)
     ------------------------------------- 586.9/586.9 KB 18.6 MB/s eta 0:00:00
Collecting tomli
  Downloading tomli-2.1.0-py3-none-any.whl (13 kB)
Collecting torch>=1.3.0
  Downloading torch-2.5.1-cp310-cp310-win_amd64.whl (203.1 MB)
     ------------------------------------- 203.1/203.1 MB 11.3 MB/s eta 0:00:00
Collecting sympy==1.13.1
  Downloading sympy-1.13.1-py3-none-any.whl (6.2 MB)
     ---------------------------------------- 6.2/6.2 MB 20.8 MB/s eta 0:00:00
Collecting filelock
  Downloading filelock-3.16.1-py3-none-any.whl (16 kB)
Collecting fsspec
  Downloading fsspec-2024.10.0-py3-none-any.

You should consider upgrading via the 'c:\Programs Files\Python\Python310\python.exe -m pip install --upgrade pip' command.
