In [9]:
import spacy
from geopy.geocoders import Nominatim
import pandas as pd

In [10]:
pip install geopy

Note: you may need to restart the kernel to use updated packages.


In [11]:
# Load the spaCy pre-trained model for NER
nlp = spacy.load("en_core_web_sm")

In [16]:


# Function to extract institution, program, and place with custom logic
def extract_info(text):
    doc = nlp(text)
    institution = None
    place = None
    program = None

    # Extract named entities using spaCy
    entities = {ent.label_: ent.text for ent in doc.ents}
    
    # Handle institution and place using spaCy's NER
    institution = entities.get("ORG", None)  # ORG -> Institution (like A T Still University)
    place = entities.get("GPE", None)  # GPE -> Place (like Arizona)
    
    # Extract the program name by removing institution and place from the text
    program = text
    if institution:
        program = program.replace(institution, "").strip("  ")
    if place:
        program = program.replace(place, "").strip("  ")
    
    # After removing institution and place, whatever remains is likely the program
    return {
        "Institution": institution,
        "Program": program,
        "Place": place
    }

# Load the CSV file
input_csv = 'unmapped3.csv'  # Path to your CSV file
df = pd.read_csv(input_csv)

# Process each row and extract the information
extracted_data = []
for index, row in df.iterrows():
    text = row['NAME']  # The column name from your CSV
    extracted_info = extract_info(text)
    extracted_data.append(extracted_info)

# Create a DataFrame for the extracted data
extracted_df = pd.DataFrame(extracted_data)

# Combine the extracted data with the original dataframe
output_df = pd.concat([df, extracted_df], axis=1)

# Save the result to a new CSV file
output_csv = 'output_hyknk.csv'  # Path to save the result
output_df.to_csv(output_csv, index=False)

print("Institution, Program, and Place have been extracted and saved to a new CSV file.")



Institution, Program, and Place have been extracted and saved to a new CSV file.
