# Step 00: download libs and backages

## Java for Standford POS tagger

In [12]:
import os
import sys
import subprocess

def is_running_in_colab():
    """
    Check if the code is running in Google Colab environment
    
    Returns:
        bool: True if running in Colab, False otherwise
    """
    try:
        import google.colab
        return True
    except ImportError:
        return False

def is_running_in_jupyter():
    """
    Check if the code is running in Jupyter notebook
    
    Returns:
        bool: True if running in Jupyter, False otherwise
    """
    try:
        from IPython import get_ipython
        if 'IPKernelApp' in get_ipython().config:
            return True
        else:
            return False
    except:
        return False

def setup_java_environment():
    """
    Setup Java environment automatically based on the platform
    
    Raises:
        EnvironmentError: If Java setup fails locally
    """
    
    if is_running_in_colab():
        print("Detected Google Colab environment - Setting up Java...")
        
        # Install JDK in Colab
        try:
            print("Installing OpenJDK 8...")
            subprocess.run([
                'apt-get', 'update', 
                '&&', 'apt-get', 'install', '-y', 'openjdk-8-jdk'
            ], check=True, shell=True)
            
            # Set environment variables
            java_home = '/usr/lib/jvm/java-8-openjdk-amd64'
            os.environ['JAVA_HOME'] = java_home
            os.environ['PATH'] = f"{java_home}/bin:{os.environ['PATH']}"
            
            print("Java setup completed successfully in Colab")
            
        except subprocess.CalledProcessError as e:
            print(f"Failed to install Java in Colab: {e}")
            raise EnvironmentError("Java installation failed in Colab")
        
        # Check if Java is already installed locally
        try:
            result = subprocess.run(
                ['java', '-version'], 
                capture_output=True, 
                text=True, 
                timeout=10
            )
            if result.returncode == 0:
                print("Java is already installed locally")
                # Extract and set JAVA_HOME if not set
                if not os.environ.get('JAVA_HOME'):
                    print("JAVA_HOME is not set. Please set it manually.")
            else:
                raise EnvironmentError("Java is not installed locally")
                
        except (FileNotFoundError, subprocess.TimeoutExpired):
            # Running locally or in other environment
            print("Java is not installed. ")
            print("Please install Java manually:")
            print("   - Download JDK 8 from: https://adoptopenjdk.net/")
            print("   - Set JAVA_HOME environment variable")
            print("   - Add Java to your PATH")
            raise EnvironmentError(
                "Please install JDK 8 manually for local execution."
            )


def check_java_installation():
    """
    Verify Java installation and version
    
    Returns:
        dict: Installation status and version info
    """
    try:
        result = subprocess.run(
            ['java', '-version'], 
            capture_output=True, 
            text=True, 
            timeout=10
        )
        
        if result.returncode == 0:
            version_info = result.stderr.split('\n')[0]
            return {
                'installed': True,
                'version': version_info,
                'environment': 'Colab' if is_running_in_colab() else 'Local'
            }
        else:
            return {
                'installed': False,
                'error': 'Java command failed',
                'environment': 'Colab' if is_running_in_colab() else 'Local'
            }
            
    except FileNotFoundError:
        return {
            'installed': False,
            'error': 'Java not found in PATH',
            'environment': 'Colab' if is_running_in_colab() else 'Local'
        }
    except Exception as e:
        return {
            'installed': False,
            'error': str(e),
            'environment': 'Colab' if is_running_in_colab() else 'Local'
        }
    
def check_java_installation():
    """Check if Java is installed and get version"""
    try:
        result = subprocess.run(['java', '-version'], 
                              capture_output=True, 
                              text=True, 
                              timeout=10)
        
        if result.returncode == 0:
            version_line = result.stderr.split('\n')[0]
            return {'status': 'installed', 'version': version_line}
        else:
            return {'status': 'not installed'}
            
    except FileNotFoundError as e:
        return {'status': 'not installed', 'error': str(e)}
    except Exception as e:
        return {'status': 'not installed', 'error': str(e)}

def check_java_home():
    """Check JAVA_HOME environment variable"""
    java_home = os.environ.get('JAVA_HOME')
    if java_home:
        print(f"JAVA_HOME is set: {java_home}")
        
        # Check if java exists in JAVA_HOME
        java_path = os.path.join(java_home, 'bin', 'java')
        if os.path.exists(java_path):
            print("Java executable found in JAVA_HOME")
            return True
        else:
            print("Java executable NOT found in JAVA_HOME")
            return False
    else:
        print("JAVA_HOME is not set")
        return False

print("Detecting environment...")

# Check environment
if is_running_in_colab():
    print("Running in Google Colab")
else:
    print("Running locally")

# Setup Java environment
try:
    setup_java_environment()
    
    # Verify installation
    java_status = check_java_installation()
    if java_status['status']:
        print(f"Java is ready: {java_status['version']}")
    else:
        print(f"Java check failed: {java_status['error']}")
        
except EnvironmentError as e:
    print(f"Environment setup failed: {e}")
    print("Please setup Java manually and try again")
    raise Exception(e)

# Continue with your Stanford POS Tagger code
print("Proceeding with Stanford POS Tagger setup...")

Detecting environment...
Running locally
Java is ready: java version "1.8.0_202"
Proceeding with Stanford POS Tagger setup...


In [3]:
%pip install -r requirements.txt

# nltk punkt_lab
import nltk
nltk.download('punkt_tab')

# Standord POS
import os
import zipfile
import requests

standford_postagger_path = './content/drive/MyDrive/stanford-postagger-full'

if not os.path.exists(standford_postagger_path):
    print("Downloading Stanford POS Tagger...")
    url = "https://nlp.stanford.edu/software/stanford-postagger-full-2018-10-16.zip"
    response = requests.get(url)

    with open('./content/stanford-postagger-full.zip', 'wb') as f:
        f.write(response.content)

    with zipfile.ZipFile('./content/stanford-postagger-full.zip', 'r') as zip_ref:
        zip_ref.extractall('./content/drive/MyDrive/')

    os.rename('./content/drive/MyDrive/stanford-postagger-full-2018-10-16', standford_postagger_path)
    print("Done!")
else:
    print(f"Standford postagger already downloaed at {standford_postagger_path},\nTo download again, delete the download folder and run the code again.")

Note: you may need to restart the kernel to use updated packages.
Standford postagger already downloaed at ./content/drive/MyDrive/stanford-postagger-full, to download again, delete the download folder and run the code again.


You should consider upgrading via the 'c:\pyvm\anlp-py310\Scripts\python.exe -m pip install --upgrade pip' command.
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\MK1349\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


# Step 01: Relations and Templates

# Step 02: Dataset

## download raw corpus

## preprocess corpus