### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [1]:
import openpyxl
import oci
import io
from datetime import datetime, timedelta
import docx
import pdfplumber
from docx import Document
from io import BytesIO

In [2]:
compartment_id = "ocid1.compartment.oc1..aaaaaaaaretksgipt3jgwfpzgh4ijyw54uynyfviaxs5li4wtl744fj4fi3q"
CONFIG_PROFILE = "DEFAULT"
config = oci.config.from_file('config', CONFIG_PROFILE)

In [3]:
endpoint = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config=config, service_endpoint=endpoint)

In [4]:
def ask_question(question, input_text):
    chat_detail = oci.generative_ai_inference.models.ChatDetails()
    chat_request = oci.generative_ai_inference.models.CohereChatRequest()
    
    # Craft the input for the AI model
    chat_request.message = f"{question}\n\nContext:\n{input_text}"
    chat_request.max_tokens = 1600
    chat_request.temperature = 0.85  # Low temperature for precise responses
    chat_request.frequency_penalty = 0
    chat_request.top_p = 0.85  # Bias towards more likely words to maintain technical tone
    chat_request.top_k = 0

    chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id="ocid1.generativeaimodel.oc1.us-chicago-1.amaaaaaask7dceya7ozidbukxwtun4ocm4ngco2jukoaht5mygpgr6gq2lgq")
    chat_detail.chat_request = chat_request
    chat_detail.compartment_id = compartment_id

    chat_response = generative_ai_inference_client.chat(chat_detail)
    chat_history = chat_response.data.chat_response.chat_history
    if chat_history:
        return chat_history[-1].message
    return "No response"

In [5]:
# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Extract text from PPTX (using python-pptx)
from pptx import Presentation

def extract_text_from_pptx(pptx_path):
    prs = Presentation(pptx_path)
    text = ''
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text

def extract_text_from_requirements_docx(file_path):
    doc = docx.Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

In [6]:
# Load RFP and RFP Response
rfp_text = extract_text_from_pdf('rfp.pdf')
rfp_response_text = extract_text_from_pptx('rfp_response.pptx')
requirements_text = extract_text_from_requirements_docx('PROJECT_SPEAK_MATE_DELIVERY_DOCS_updated_requirements_document (4).docx')

In [26]:
questions = [
    "What is the purpose and scope of the project based on the RFP and response?",
    "What are the key technical terms and abbreviations used in the project?",
    "What are the key assumptions for this project?",
    "What documents, systems, or standards are referenced for this project?",
    "What is the overall solution architecture, including workflows and key processes?",
    "What is the physical setup for this solution?",
    "What is the data modeling and migration strategy for this project?",
    "What are the key custom UI screens and logic details?",
    "What authentication mechanisms are being used for this project?",
    "What are the key integration points for this project?",
    "What reports are required for this project, and how will they be generated?",
    "How will errors be handled in the integration and application layers?",
    "What is the notification mechanism for the system?",
    "What are the key database objects being used for this project?",
    "What are the main REST services and APIs used for communication?",
    "What are the post-implementation support activities planned for this project?"
]

In [27]:
len(questions)

16

In [28]:
def generate_technical_spec(rfp_text, rfp_response_text, requirements_text):
    # Create a dictionary to hold each section of the technical specification
    spec_document = {}
    
    # Map each question to the relevant section of the document
    spec_document['Purpose and Scope'] = ask_question(questions[0], rfp_text)
    spec_document['Glossary of Technical Terms'] = ask_question(questions[1], rfp_response_text)
    spec_document['Assumptions'] = ask_question(questions[2], requirements_text)
    spec_document['References'] = ask_question(questions[3], rfp_text)
    
    # Solution Overview
    spec_document['Solution Architecture'] = ask_question(questions[4], rfp_response_text)
    
    # Application Components
    spec_document['Physical Setup'] = ask_question(questions[5], requirements_text)
    spec_document['Data Modeling and Migration'] = ask_question(questions[6], rfp_response_text)
    spec_document['Custom UI Screens and Logic'] = ask_question(questions[7], requirements_text)
    spec_document['Authentication Mechanisms'] = ask_question(questions[8], rfp_response_text)
    
    # Services Integration
    spec_document['Integration Points'] = ask_question(questions[9], rfp_response_text)
    spec_document['Reports and Dashboards'] = ask_question(questions[10], requirements_text)
    
    # Error Handling and Notifications
    spec_document['Error Handling'] = ask_question(questions[11], rfp_response_text)
    spec_document['Notifications'] = ask_question(questions[12], requirements_text)
    
    # Database and APIs
    spec_document['Database Objects'] = ask_question(questions[13], requirements_text)
    spec_document['REST Services and APIs'] = ask_question(questions[14], rfp_response_text)
    
    # Post-Implementation Support
    spec_document['Post-Implementation Support'] = ask_question(questions[15], requirements_text)
    
    return spec_document

In [30]:
technical_spec = generate_technical_spec(rfp_text, rfp_response_text, requirements_text)

In [31]:
technical_spec

{'Purpose and Scope': 'The purpose of the project is to implement a consolidation and reporting solution for Abu Dhabi Aviation Holding, leveraging the existing ADQ Aviation application and architecture. The scope of the project includes:\n\n- Enabling consolidation for statutory and management reporting\n- Implementing the ADA entity and ownership structure\n- Automating data integrations and loads for all entities\n- Enabling SmartView-based reporting packs\n- Facilitating data movements from ADA to AAS for statutory and management reporting\n- Enabling consolidation of actual, management, budget, and forecast scenarios\n- Providing variance analysis capabilities\n- Configuring standard out-of-box ratios and KPIs, as well as additional ratios for operational reporting\n\nThe project aims to facilitate monthly reporting, data movements, and consolidations for ADA, adhering to ADQ Aviation reporting requirements.',
 'Glossary of Technical Terms': "Here is a list of key technical terms 

In [32]:
type(technical_spec)

dict

In [43]:
def add_heading(doc, text, level):
    doc.add_heading(text, level=level)

def add_paragraph(doc, text):
    doc.add_paragraph(text)
    para.alignment = WD_ALIGN_PARAGRAPH.LEFT

def generate_technical_spec_template(response_dict, template_path):
    doc = Document(template_path)
    doc.add_heading('1.Overview', level=1)
    doc.add_heading('Purpose & Scope', level=2)
    doc.add_paragraph(response_dict.get('Purpose and Scope', 'No data available'))

    doc.add_heading('Glossary of Technical Terms', level=2)
    doc.add_paragraph(response_dict.get('Glossary of Technical Terms', 'No data available'))

    doc.add_heading('Assumptions', level=2)
    doc.add_paragraph(response_dict.get('Assumptions', 'No data available'))

    doc.add_heading('References', level=2)
    doc.add_paragraph(response_dict.get('References', 'No data available'))

    doc.add_heading('2.Solution Overview', level=1)
    doc.add_paragraph(response_dict.get('Solution Architecture', 'No data available'))

    doc.add_heading('3.Solution Components', level=1)
    doc.add_heading('Physical View', level=2)
    doc.add_paragraph(response_dict.get('Physical Setup', 'No data available'))

    doc.add_heading('Data Modelling and Migration', level=2)
    doc.add_paragraph(response_dict.get('Data Modeling and Migration', 'No data available'))

    doc.add_heading('Custom UI Screens and Logic', level=2)
    doc.add_paragraph(response_dict.get('Custom UI Screens and Logic', 'No data available'))

    doc.add_heading('Authentication Mechanism', level=2)
    doc.add_paragraph(response_dict.get('Authentication Mechanisms', 'No data available'))

    doc.add_heading('Integrations', level=2)
    doc.add_paragraph(response_dict.get('Integration Points', 'No data available'))

    doc.add_heading('Reports and Dashboards', level=2)
    doc.add_paragraph(response_dict.get('Reports and Dashboards', 'No data available'))

    doc.add_heading('Error Handling', level=2)
    doc.add_paragraph(response_dict.get('Error Handling', 'No data available'))

    doc.add_heading('Notifications', level=2)
    doc.add_paragraph(response_dict.get('Notifications', 'No data available'))

    doc.add_heading('Database Objects', level=2)
    doc.add_paragraph(response_dict.get('Database Objects', 'No data available'))

    doc.add_heading('REST Services and APIs', level=2)
    doc.add_paragraph(response_dict.get('REST Services and APIs', 'No data available'))

    doc.add_heading('Post-Implementation Support', level=2)
    doc.add_paragraph(response_dict.get('Post-Implementation Support', 'No data available'))

    # Save the document to a BytesIO object
    output = BytesIO()
    doc.save(output)
    output.seek(0)  # Rewind the buffer for reading

    return output


def upload_to_object_storage(object_storage_client, namespace_name, bucket_name, object_name, file_content):
    try:
        # Upload the document (in-memory) to OCI Object Storage
        print(f"Uploading {object_name} to bucket {bucket_name}...")
        response = object_storage_client.put_object(
            namespace_name,
            bucket_name,
            object_name,
            file_content
        )
        print(f"Upload complete: {response.status}")
    except Exception as e:
        print(f"Failed to upload file to Object Storage: {str(e)}")


# Configuration for OCI
namespace_name = "gc35013"
bucket_name = "ECHO"
object_name = "PROJECT_SPEAK_MATE/DELIVERY_DOCS/technical_specification.docx"
compartment_id = "ocid1.compartment.oc1..aaaaaaaaretksgipt3jgwfpzgh4ijyw54uynyfviaxs5li4wtl744fj4fi3q"

# Load OCI configuration from file
config = oci.config.from_file("config", "DEFAULT")
object_storage_client = oci.object_storage.ObjectStorageClient(config)
template_path = "td_template.docx"

# Generate document in memory
document_content = generate_technical_spec_template(technical_spec, template_path)

# Upload the document to OCI Object Storage
upload_to_object_storage(object_storage_client, namespace_name, bucket_name, object_name, document_content)

Uploading PROJECT_SPEAK_MATE/DELIVERY_DOCS/technical_specification.docx to bucket ECHO...
Upload complete: 200
