From 1d3d8d2bdd02e2448d7bf3ec74d72aca541b5203 Mon Sep 17 00:00:00 2001 From: MethosGr Date: Fri, 2 May 2025 11:56:51 +0300 Subject: [PATCH] Script for Bulk import of Metadata fields in Phrase TMS - v 0.05 --- Bulk import script Documentation.txt | 175 ++++++++++++++++++++++ Import tool.py | 205 ++++++++++++++++++++++++++ 2 files changed, 380 insertions(+) create mode 100644 Bulk import script Documentation.txt create mode 100644 Import tool.py diff --git a/Bulk import script Documentation.txt b/Bulk import script Documentation.txt new file mode 100644 index 0000000..ed5a5ea --- /dev/null +++ b/Bulk import script Documentation.txt @@ -0,0 +1,175 @@ +Phrase TMS Bulk Import Script Documentation +Version: 2.0 (Progress Bar Enhanced) +Last Updated: [Date] + +1. Prerequisites +1.1 System Requirements +Python 3.8+ (tested on 3.8, 3.9, 3.10) + +RAM: Minimum 512MB (optimized for low memory usage) + +Disk Space: 50MB free + +Network: HTTPS access to cloud.memsource.com + +1.2 Software Dependencies +Install these packages using pip: + +bash +pip install \ + requests==2.31.0 \ + tqdm==4.66.1 \ + python-dotenv==1.0.0 \ + urllib3==1.26.18 + +1.3 Phrase TMS Requirements +Valid API credentials (Admin-level access) + +Existing domain structure (if creating subdomains) + +API access enabled for your account + + +2. Setup Instructions + +2.1 Environment Configuration +Create a project folder: +bash +mkdir phrase-import && cd phrase-import + +Add credentials to .env file: +ini +# .env +PHRASE_USER="your.email@organization.com" +PHRASE_PASSWORD="yourSecurePassword123!" + +Set file permissions (Linux/macOS): +bash +chmod 600 .env + +2.2 CSV File Preparation +File Requirements: + +UTF-8 encoding + +First row as header + +Columns (case-insensitive): +Column Name Required For Example Value +type All domain +name All Marketing Team +timezone Domains Europe/Paris +parent_domain_id Subdomains DOM-1234 +client_id Business Units CLIENT-5678 + +Sample CSV (structure.csv): +csv +type,name,timezone,parent_domain_id,client_id +domain,EMEA Division,Europe/Berlin,, +subdomain,France Team,,DOM-9876, +client,Acme Corporation,,, +business_unit,Legal Dept,,,CLIENT-123 + + +3. Execution Guide + +3.1 Basic Command +bash +python import_tool.py structure.csv +Expected Output: +Progress Bar Example +Real-time progress with success/error counts + +3.2 Advanced Options +Flag Description Example +--delimiter CSV delimiter character --delimiter=';' +--dry-run Validate without creating entities --dry-run +--help Show help message python import_tool.py -h + +Dry Run Example: +bash +python import_tool.py test_data.csv --delimiter=',' --dry-run + + +4. Post-Execution Steps + +4.1 Verify Results +Check bulk_import.log: +bash +tail -f bulk_import.log + +Validate in Phrase TMS UI: +Domains: Admin Console → Domains +Clients: Admin Console → Clients + +4.2 Handle Errors +Retry Failed Items: + +Create failed_rows.csv from log entries +bash +grep "ERROR" bulk_import.log > failed_rows.csv + +Re-run with filtered file +bash +python import_tool.py failed_rows.csv + +Common Error Codes: +Code Meaning Solution +401 Invalid credentials Verify .env file +403 Permission denied Check admin rights +409 Entity already exists Update CSV with unique IDs +500 Server error Retry after 5 minutes + + +5. Performance Optimization + +5.1 For Large Files (>10,000 Rows) +Split CSV into chunks: + +bash +split -l 1000 large_file.csv chunk_ +Parallel processing (GNU Parallel): + +bash +parallel -j 4 "python import_tool.py {}" ::: chunk_* + +5.2 Memory Management +Windows Task Manager: Monitor Python process memory + +Linux/macOS: + +bash +top -pid $(pgrep -f import_tool.py) + + +6. Security Best Practices + +6.1 Credential Safety +Rotate API passwords quarterly + +Never commit .env to version control + +Use environment variables in production: + +bash +export PHRASE_USER="user@company.com" +export PHRASE_PASSWORD="..." + +6.2 Network Security +Whitelist Phrase TMS IP ranges: + +52.28.160.0/19 +52.57.224.0/19 +Use VPN for on-premise deployments + + +7. Support + +7.1 Troubleshooting Guide +Symptom Diagnostic Command +Slow performance ping cloud.memsource.com +Connection failures curl -v https://cloud.memsource.com/web/api/v2/auth/login +Encoding errors file -I structure.csv + +7.2 Contact Information +Phrase TMS Support: support@phrase.com diff --git a/Import tool.py b/Import tool.py new file mode 100644 index 0000000..c4f5f8b --- /dev/null +++ b/Import tool.py @@ -0,0 +1,205 @@ +""" +Phrase TMS Bulk Import Script with Progress Tracking +- Features memory-safe streaming CSV processing +- Real-time progress statistics +- Enterprise-grade error handling +""" + +import os +import csv +import logging +from time import sleep +from dotenv import load_dotenv +from typing import Dict, Any, Generator +import requests +from requests.adapters import HTTPAdapter +from urllib3.util.retry import Retry +from tqdm import tqdm + +# Configuration +load_dotenv() +BASE_URL = "https://cloud.memsource.com/web/api/v1" # Verified correct version +MAX_RETRIES = 3 +BACKOFF_FACTOR = 1 +TIMEOUT = 30 +CSV_FIELDS = { + 'domain': ['name', 'timezone'], + 'subdomain': ['name', 'parent_domain_id'], + 'client': ['name'], + 'business_unit': ['name', 'client_id'] +} + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler('bulk_import.log'), + logging.StreamHandler() + ] +) + + +class PhraseTMSClient: + """Enhanced API client with connection pooling and smart retries""" + + def __init__(self): + self.session = requests.Session() + retry = Retry( + total=MAX_RETRIES, + backoff_factor=BACKOFF_FACTOR, + status_forcelist=[500, 502, 503, 504], + allowed_methods=['POST', 'PUT', 'GET', 'DELETE'] + ) + adapter = HTTPAdapter(max_retries=retry) + self.session.mount('https://', adapter) + self.token = self._authenticate() + + def _authenticate(self) -> str: + """Secure credential handling with environment variables""" + credentials = { + 'userName': os.getenv('PHRASE_USER'), + 'password': os.getenv('PHRASE_PASSWORD') + } + response = self.session.post( + f"{BASE_URL}/auth/login", + json=credentials, + timeout=TIMEOUT + ) + response.raise_for_status() + return response.json()['token'] + + def create_entity(self, entity_type: str, data: Dict[str, Any]) -> Dict[str, Any]: + """Generic entity creation with conflict detection""" + endpoints = { + 'domain': '/domains', + 'subdomain': lambda d: f"/domains/{d['parent_domain_id']}/subDomains", + 'client': '/clients', + 'business_unit': '/businessUnits' + } + + url = BASE_URL + ( + endpoints[entity_type](data) if callable(endpoints[entity_type]) + else endpoints[entity_type] + ) + + response = self.session.post( + url, + json=data, + headers={'Authorization': f'ApiToken {self.token}'}, + timeout=TIMEOUT + ) + + if response.status_code == 409: + logging.debug(f"Entity conflict: {data.get('name')}") + return {'status': 'conflict'} + + response.raise_for_status() + return response.json() + + +def validate_row(entity_type: str, row: Dict[str, str]) -> bool: + """Structural validation of CSV rows""" + required = CSV_FIELDS[entity_type] + missing = [field for field in required if not row.get(field)] + if missing: + logging.warning(f"Missing fields: {missing} in {row.get('name')}") + return False + return True + + +def count_csv_rows(file_path: str, delimiter: str) -> int: + """Memory-efficient row counting""" + with open(file_path, 'r', encoding='utf-8') as f: + reader = csv.reader(f, delimiter=delimiter) + next(reader, None) # Skip header + return sum(1 for _ in reader) + + +def process_csv(file_path: str, delimiter: str) -> Generator[Dict[str, str], None, None]: + """Streaming CSV parser with normalization""" + with open(file_path, 'r', encoding='utf-8', newline='') as f: + reader = csv.DictReader(f, delimiter=delimiter) + for row in reader: + yield {k.strip().lower(): v.strip() for k, v in row.items()} + + +def bulk_import(file_path: str, delimiter: str, dry_run: bool = False): + """Main import workflow with progress tracking""" + client = PhraseTMSClient() + stats = {'success': 0, 'errors': 0, 'skipped': 0} + + total_rows = count_csv_rows(file_path, delimiter) + + with tqdm( + total=total_rows, + desc="šŸš€ Importing", + unit="row", + bar_format="{l_bar}{bar:20}{r_bar}", + dynamic_ncols=True + ) as pbar: + for row in process_csv(file_path, delimiter): + try: + entity_type = row.get('type', '').lower() + if not entity_type or entity_type not in CSV_FIELDS: + stats['errors'] += 1 + logging.error(f"Invalid type: {row.get('type')}") + continue + + if not validate_row(entity_type, row): + stats['errors'] += 1 + continue + + if dry_run: + stats['success'] += 1 + continue + + result = client.create_entity(entity_type, row) + if result.get('status') == 'conflict': + stats['skipped'] += 1 + elif result: + stats['success'] += 1 + else: + stats['skipped'] += 1 + + except Exception as e: + stats['errors'] += 1 + logging.debug(f"Row error: {str(e)}") + sleep(0.5) # Error cooldown + + finally: + pbar.update(1) + pbar.set_postfix( + success=stats['success'], + errors=stats['errors'], + skipped=stats['skipped'], + refresh=False + ) + + del row # Memory management + + logging.info("\nšŸ”„ Final Statistics:") + logging.info(f"āœ… Success: {stats['success']}") + logging.info(f"āš ļø Skipped: {stats['skipped']}") + logging.info(f"āŒ Errors: {stats['errors']}") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser(description='Phrase TMS Bulk Import Tool') + parser.add_argument('file', help='CSV file path') + parser.add_argument('--delimiter', default=',', help='CSV delimiter') + parser.add_argument('--dry-run', action='store_true', help='Simulate import') + args = parser.parse_args() + + try: + bulk_import( + args.file, + args.delimiter, + args.dry_run + ) + except KeyboardInterrupt: + logging.info("\nšŸ›‘ Operation cancelled by user") + except Exception as e: + logging.error(f"šŸ’„ Catastrophic failure: {str(e)}") \ No newline at end of file