### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

#### Parsing rfp document

In [57]:
!pip install pdfplumber



In [58]:
!pip install python-pptx



In [59]:
import pdfplumber

pdf_path = "rfp_sample.pdf"

sections = {}
current_section = None
section_text = ""

with pdfplumber.open(pdf_path) as pdf:
    for page in pdf.pages:
        text = page.extract_text()
        
        # Split the text into lines
        lines = text.split("\n")
        
        for line in lines:
            # Check for potential section headings (e.g., lines in all caps, numbers followed by a period)
            if line.isupper() or (line[0].isdigit() and line[1:2] == "."):
                # Save the previous section if any
                if current_section:
                    sections[current_section] = section_text.strip()
                
                # Start a new section
                current_section = line
                section_text = ""
            else:
                # Accumulate section text
                section_text += line + " "
        
        # Save the last section after processing the page
        if current_section:
            sections[current_section] = section_text.strip()

# Convert sections dictionary to a DataFrame or you can use it as-is
import pandas as pd
sections_df = pd.DataFrame(list(sections.items()), columns=["Section", "Content"])

# You can now save the extracted sections or manipulate them further
sections_df.to_csv('extracted_sections.csv', index=False)

#### Parsing rfp response document

In [60]:
from pptx import Presentation

def extract_text_from_pptx(pptx_path):
    prs = Presentation(pptx_path)
    text_content = []
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text_content.append(shape.text)
    return "\n".join(text_content)

# Usage:
rfp_response_text = extract_text_from_pptx("rfp_response.pptx")

In [61]:
rfp_response_text

"Oracle Data Mart & Analytics with LLM powered Oracle Chat Bot\nSmart Chat Assistant\nOracle Consulting\n\n\n\nMAY  2024\nRough Order of Magnitude - Proposal\nAgenda\n2\n1\n2\nBusiness Value Through this Implementation\n4\nScope & Deliverables\n5\n6\nAssumptions & Obligations\n3\nImplementation RACI\n\n\n\nExecutive Summary\nADA roles\nCopyright © 2024, Oracle and/or its affiliates  \nOracle Consulting will implement  Oracle chatbot, powered by state-of-the-art Large Language Model (LLM) technology, offers advanced conversational capabilities, enabling businesses to interact with data in a more intuitive and efficient manner. Integrated seamlessly with Oracle's robust Data Mart and analytics platform, this solution transforms the way Abu Dhabi Aviation can access, analyze, and leverage their data.\n\nKey Features include Advanced Conversational AI Leveraging LLM technology, Seamless Integration with Oracle Data Mart coupled with Powerful Oracle Analytics Dashboards and Reports.\n\n\n\n

#### Extract BOM from 'Environment Setup' section

In [62]:
import re
def extract_section(text, section_name, stop_keywords):
    """
    Extracts content under a given section name until the next section
    or a stop keyword like "Copyright" or a new numbered section.
    """
    section_content = []
    lines = text.split("\n")
    capture = False
    
    for line in lines:
        # Check if the line contains the section heading
        if section_name.lower() in line.lower():
            capture = True
            section_content.append(line)  # Add the section title too
        elif capture:
            # Stop if we encounter a stop keyword or line that starts a new section (could be numeric)
            if any(stop_keyword.lower() in line.lower() for stop_keyword in stop_keywords) or \
               line.strip().isdigit():
                break
            section_content.append(line)
    
    cleaned_section = re.sub(r'[\x0b\x0c]', '', "\n".join(section_content)).strip()
    cleaned_section = cleaned_section.replace("ScopeEnvironment setup", "").strip()
    return cleaned_section

# Usage:
pptx_path = "rfp_response.pptx"
rfp_response_text = extract_text_from_pptx(pptx_path)

# Keywords that indicate the end of a section (e.g., new sections or copyright info)
stop_keywords = ["Copyright", "Scope", "Business Value", "Data Sources", "Deliverables", "Assumptions"]

# Extract the "Environment Setup" section
environment_setup_section = extract_section(rfp_response_text, "Environment setup", stop_keywords)
print(environment_setup_section)

Environment setup on OCI 
[2 environments Non-Prod and Prod ]

OCI Foundation Setup
Oracle Data Integration
Autonomous database
Oracle Digital Assistant 
Object Storage 
Oracle Identity Access Management Cloud Services 
Oracle Analytics Cloud 
Oracle API gateway 
OCI GPU for LLM
OKE


In [63]:
import pandas as pd

# Example environment setup text (this should come from your actual extraction process)
# environment_setup_text = """
# Environment setup on OCI
# [2 environments Non-Prod and Prod]
# OCI Foundation Setup
# Oracle Data Integration
# Autonomous database
# Oracle Digital Assistant
# Object Storage
# Oracle Identity Access Management Cloud Services
# Oracle Analytics Cloud
# Oracle API gateway
# OCI GPU for LLM
# OKE
# """

# Split the text into lines and extract the offer names
offer_names = []
lines = environment_setup_section.split("\n")

# Loop through the lines and add any valid offer name to the list
for line in lines:
    line = line.strip()
    if line and "setup" not in line.lower():  # Ignore lines like 'Environment setup'
        offer_names.append(line)

# Display the extracted offer names
print("Extracted Offer Names:", offer_names)

Extracted Offer Names: ['[2 environments Non-Prod and Prod ]', 'Oracle Data Integration', 'Autonomous database', 'Oracle Digital Assistant', 'Object Storage', 'Oracle Identity Access Management Cloud Services', 'Oracle Analytics Cloud', 'Oracle API gateway', 'OCI GPU for LLM', 'OKE']


In [64]:
data = []
for offer_name in offer_names:
    # Create one entry for Prod
    data.append({
        "License Included PaaS": "Prod",
        "Offer Name": offer_name,
        "part_number": "",  # Placeholder for now
        "Unit Price": "",  # Placeholder for now
        "Metric": "",  # Placeholder for now
        "Minimum": "",  # Placeholder for now
        "Includes": "",  # Placeholder for now
        "Hours/Month": "",  # Placeholder for now
        "Months": "",  # Placeholder for now
        "Quantity": "",  # Placeholder for now
    })
    
    # Create another entry for Non-Prod
    data.append({
        "License Included PaaS": "Non-Prod",
        "Offer Name": offer_name,
        "part_number": "",  # Placeholder for now
        "Unit Price": "",  # Placeholder for now
        "Metric": "",  # Placeholder for now
        "Minimum": "",  # Placeholder for now
        "Includes": "",  # Placeholder for now
        "Hours/Month": "",  # Placeholder for now
        "Months": "",  # Placeholder for now
        "Quantity": "",  # Placeholder for now
    })

# Create a DataFrame from the data
df_bom = pd.DataFrame(data)
df_bom = df_bom[2:]

# Display the DataFrame
print(df_bom)

# Optionally save to a CSV or Excel file
df_bom.to_csv("bom_with_offer_names.csv", index=False)

   License Included PaaS                                        Offer Name  \
2                   Prod                           Oracle Data Integration   
3               Non-Prod                           Oracle Data Integration   
4                   Prod                               Autonomous database   
5               Non-Prod                               Autonomous database   
6                   Prod                          Oracle Digital Assistant   
7               Non-Prod                          Oracle Digital Assistant   
8                   Prod                                    Object Storage   
9               Non-Prod                                    Object Storage   
10                  Prod  Oracle Identity Access Management Cloud Services   
11              Non-Prod  Oracle Identity Access Management Cloud Services   
12                  Prod                            Oracle Analytics Cloud   
13              Non-Prod                            Oracle Analy

In [65]:
import openpyxl
df_bom.to_excel("bom_with_offer_names.xlsx", index=False,engine='openpyxl')

In [66]:
!pip install openpyxl



In [67]:
import requests
# ORDS credentials and endpoint (Replace with your actual values)
ORDS_USERNAME = "INTEGRATION_USER"
ORDS_PASSWORD = "ORA@ora123456"
ORDS_ENDPOINT = "https://xc1jkwmwg759api-apexsolutions.adb.us-ashburn-1.oraclecloudapps.com/ords/echo/ECHO/getProductPricing"

def fetch_ords_table_data(ords_endpoint, username, password):
    try:
        # Make a GET request to the ORDS endpoint
        response = requests.get(ords_endpoint, auth=(username, password))

        # Check if the request was successful
        if response.status_code != 200:
            print(f"Failed to fetch data from ORDS. Status Code: {response.status_code}")
            print(response.text)
            return None

        # Parse the JSON response
        data = response.json()
        rows = data.get('items', [])  # Extract the 'items' key where data resides

        # Convert to a Pandas DataFrame
        df = pd.DataFrame(rows)
        print("Fetched Data:")
        print(df.head())  # Display the first few rows

        # Save the data to a CSV file
        df.to_csv("ords_table_data.csv", index=False)
        print("Data saved to ords_table_data.csv")

        return df

    except Exception as e:
        print(f"An error occurred: {e}")

# Usage
if __name__ == "__main__":
    df = fetch_ords_table_data(ORDS_ENDPOINT, ORDS_USERNAME, ORDS_PASSWORD)

Fetched Data:
                                         productname  price  \
0  Oracle Autonomous Data Warehouse on Dedicated ...   0.34   
1  Oracle Autonomous Data Warehouse on Dedicated ...   0.08   
2  Oracle Autonomous Transaction Processing on De...   0.34   
3  Oracle Autonomous Transaction Processing on De...   0.08   
4  Oracle Autonomous Database on Dedicated Exadat...  14.52   

  universal_credits_minimums minimum_universal_credits_details   payasyougo  \
0                       .336                     ECPU Per Hour  Always Free   
1                      .0807                     ECPU Per Hour  Always Free   
2                       .336                     ECPU Per Hour  Always Free   
3                      .0807                     ECPU Per Hour  Always Free   
4                    14.5162       Hosted Environment Per Hour  Always Free   

    annualflex                       metric  \
0  Always Free                ECPU Per Hour   
1  Always Free                ECPU Per

In [68]:
df_bom

Unnamed: 0,License Included PaaS,Offer Name,part_number,Unit Price,Metric,Minimum,Includes,Hours/Month,Months,Quantity
2,Prod,Oracle Data Integration,,,,,,,,
3,Non-Prod,Oracle Data Integration,,,,,,,,
4,Prod,Autonomous database,,,,,,,,
5,Non-Prod,Autonomous database,,,,,,,,
6,Prod,Oracle Digital Assistant,,,,,,,,
7,Non-Prod,Oracle Digital Assistant,,,,,,,,
8,Prod,Object Storage,,,,,,,,
9,Non-Prod,Object Storage,,,,,,,,
10,Prod,Oracle Identity Access Management Cloud Services,,,,,,,,
11,Non-Prod,Oracle Identity Access Management Cloud Services,,,,,,,,


In [69]:
df.head()

Unnamed: 0,productname,price,universal_credits_minimums,minimum_universal_credits_details,payasyougo,annualflex,metric,additionalinformation,notes,part_number,product_other_names
0,Oracle Autonomous Data Warehouse on Dedicated ...,0.34,0.336,ECPU Per Hour,Always Free,Always Free,ECPU Per Hour,Partial ECPU hours consumed are billed per sec...,1,895712,
1,Oracle Autonomous Data Warehouse on Dedicated ...,0.08,0.0807,ECPU Per Hour,Always Free,Always Free,ECPU Per Hour,Partial ECPU hours consumed are billed per sec...,1,895714,
2,Oracle Autonomous Transaction Processing on De...,0.34,0.336,ECPU Per Hour,Always Free,Always Free,ECPU Per Hour,Partial ECPU hours consumed are billed per sec...,1,895713,
3,Oracle Autonomous Transaction Processing on De...,0.08,0.0807,ECPU Per Hour,Always Free,Always Free,ECPU Per Hour,Partial ECPU hours consumed are billed per sec...,1,895715,
4,Oracle Autonomous Database on Dedicated Exadat...,14.52,14.5162,Hosted Environment Per Hour,Always Free,Always Free,Hosted Environment Per Hour,Zero (0) OCPUs enabled. 149 terabytes of usabl...,1,B91535,


In [70]:
df.to_csv('bom_master.csv')

In [71]:
df = pd.read_csv(r'bom_master.csv')

In [72]:
merged_df = pd.merge(df_bom, df, left_on='Offer Name', right_on='product_other_names', how='left')
merged_df['part_number'] = merged_df['part_number_y']  # Use the part_number from df (suffix _y)
merged_df['Unit Price'] = merged_df['price']  # Fill Unit Price
merged_df['Metric'] = merged_df['metric']    # Fill Metric

columns_to_drop = ['productname', 'price', 'universal_credits_minimums', 'minimum_universal_credits_details', 'payasyougo', 'annualflex', 'additionalinformation', 'notes', 'part_number_y', 'product_other_names']
merged_df.drop(columns=columns_to_drop, inplace=True)

print(merged_df)

   License Included PaaS                                        Offer Name  \
0                   Prod                           Oracle Data Integration   
1               Non-Prod                           Oracle Data Integration   
2                   Prod                               Autonomous database   
3               Non-Prod                               Autonomous database   
4                   Prod                          Oracle Digital Assistant   
5               Non-Prod                          Oracle Digital Assistant   
6                   Prod                                    Object Storage   
7               Non-Prod                                    Object Storage   
8                   Prod  Oracle Identity Access Management Cloud Services   
9               Non-Prod  Oracle Identity Access Management Cloud Services   
10                  Prod                            Oracle Analytics Cloud   
11              Non-Prod                            Oracle Analy

In [73]:
merged_df.to_csv(r'ai_generated_bom.csv')