### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [27]:
import openpyxl
import oci
import io
from datetime import datetime, timedelta
import docx

In [2]:
# Setup OCI and basic configurations
compartment_id = "ocid1.compartment.oc1..aaaaaaaaretksgipt3jgwfpzgh4ijyw54uynyfviaxs5li4wtl744fj4fi3q"
CONFIG_PROFILE = "DEFAULT"
config = oci.config.from_file('config', CONFIG_PROFILE)

In [3]:
# Service endpoint for Generative AI Inference
endpoint = "https://inference.generativeai.us-chicago-1.oci.oraclecloud.com"
generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(config=config, service_endpoint=endpoint)

In [30]:
def ask_question(question, input_text):
    chat_detail = oci.generative_ai_inference.models.ChatDetails()
    chat_request = oci.generative_ai_inference.models.CohereChatRequest()
    
    # Craft the input for the AI model
    chat_request.message = f"{question}\n\nContext:\n{input_text}"
    chat_request.max_tokens = 1600
    chat_request.temperature = 0.85  # Low temperature for precise responses
    chat_request.frequency_penalty = 0
    chat_request.top_p = 0.85  # Bias towards more likely words to maintain technical tone
    chat_request.top_k = 0

    chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id="ocid1.generativeaimodel.oc1.us-chicago-1.amaaaaaask7dceya7ozidbukxwtun4ocm4ngco2jukoaht5mygpgr6gq2lgq")
    chat_detail.chat_request = chat_request
    chat_detail.compartment_id = compartment_id

    chat_response = generative_ai_inference_client.chat(chat_detail)
    chat_history = chat_response.data.chat_response.chat_history
    if chat_history:
        return chat_history[-1].message
    return "No response"

In [23]:
import pdfplumber
from docx import Document

# Extract text from PDF
def extract_text_from_pdf(pdf_path):
    with pdfplumber.open(pdf_path) as pdf:
        text = ''
        for page in pdf.pages:
            text += page.extract_text()
    return text

# Extract text from PPTX (using python-pptx)
from pptx import Presentation

def extract_text_from_pptx(pptx_path):
    prs = Presentation(pptx_path)
    text = ''
    for slide in prs.slides:
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text += shape.text + "\n"
    return text

# Extract relevant sections using keywords
def extract_sections(text, keywords):
    sections = {}
    for keyword in keywords:
        sections[keyword] = []
        for line in text.split("\n"):
            if keyword.lower() in line.lower():
                sections[keyword].append(line)
    return sections

def extract_text_from_requirements_docx(file_path):
    doc = docx.Document(file_path)
    full_text = []
    for para in doc.paragraphs:
        full_text.append(para.text)
    return '\n'.join(full_text)

In [6]:
few_shot_examples = [
    "Phase: Project Initiation (Engage)\nTask: Project Kickoff and Project Team Mobilization\nDuration: 2 days\nStart: July 4, 2024\nEnd: July 5, 2024\nResources: Oracle, ADA",
    "Phase: Requirement Gathering and Analysis\nTask: Define Functional and Non-Functional Requirements\nDuration: 7 days\nStart: October 22, 2024\nEnd: October 29, 2024\nResources: Oracle, ADA"
]

In [97]:
questions = [
    "Can you generate a comprehensive list of tasks for the project initiation phase based on the RFP, response and the requirements?",
    "What are the exhaustive list of tasks required for the requirement gathering phase?",
    "Please list all the tasks related to design phases of the project.",
    "Please list all the tasks related to development phase of the project.",
    "Please list all the tasks related to testing phase of the project.",
    "What are the different deployment tasks?", 
    "List down all the post-implementation support activities?"
]


In [28]:
# Load RFP and RFP Response
rfp_text = extract_text_from_pdf('rfp.pdf')
rfp_response_text = extract_text_from_pptx('rfp_response.pptx')
requirements_text = extract_text_from_requirements_docx('PROJECT_SPEAK_MATE_DELIVERY_DOCS_updated_requirements_document (4).docx')

In [98]:
def generate_tasks(questions, rfp_text, rfp_response_text, requirements_text):
    combined_input = f"RFP Document:\n{rfp_text}\n\nRFP Response Document:\n{rfp_response_text}\n\nRequirements Document:\n{requirements_text}"
    
    task_list = []
    for question in questions:
        response = ask_question(question, combined_input)
        task_list.append({"Phase": question, "Tasks": response})
    return task_list

In [99]:
task_list = generate_tasks(questions, rfp_text, rfp_response_text, requirements_text)

In [100]:
task_list

[{'Phase': 'Can you generate a comprehensive list of tasks for the project initiation phase based on the RFP, response and the requirements?',
  'Tasks': "Here is a comprehensive list of tasks for the project initiation phase based on the information provided: \n\n- **Kick-off Meeting:** \n   - Introduce the project team members and stakeholders. \n   - Review the project objectives, scope, and timelines. \n   - Discuss the overall project plan and next steps. \n\n- **Requirement Gathering:** \n   - Conduct sessions to gather and analyze business, functional, and technical requirements. \n   - Identify and document specific needs for reporting, consolidation, data integration, security, and other aspects. \n\n- **Solution Design:** \n   - Design the solution architecture, including data flow, system components, and integrations. \n   - Identify and assess any potential risks and dependencies. \n   - Create a detailed plan for implementation, including task breakdown and resource alloca

In [113]:
import pandas as pd
import random
import numpy as np

# Function to extract the main phase from the 'Phase' text
def extract_phase(phase_text):
    phase_keywords = {
        "Project Initiation": ["initiation", "kickoff"],
        "Requirements Gathering": ["requirement gathering"],
        "Design": ["design"],
        "Development": ["development"],
        "Testing": ["testing"],
        "Deployment": ["deployment"],
        "Post-Implementation Support": ["post-implementation"]
    }
    
    for keyword, patterns in phase_keywords.items():
        for pattern in patterns:
            if re.search(pattern, phase_text, re.IGNORECASE):
                return keyword
    return "Other"  # If no specific phase is matched

# Function to split tasks into individual items
def extract_tasks(tasks_text):
    tasks = []
    lines = tasks_text.split("\n")
    
    for line in lines:
        line = line.strip()
        if line.startswith("- "):  # Main task or sub-task indicator
            task_name = re.sub(r"^[-*]\s*\*\*(.*?)\*\*.*$", r"\1", line)
            tasks.append({"Task Name": task_name})
    
    return tasks

# Function to assign random resource names with a 60% allocation to "Oracle-Client"
def assign_random_resource():
    resources = ['Oracle-Client', 'Client', 'Oracle']
    return np.random.choice(resources, p=[0.6, 0.2, 0.2])

# Function to build the project plan by phase
def build_project_plan(task_list):
    project_plan = []

    for entry in task_list:
        phase = extract_phase(entry['Phase'])  # Extracting phase from the Phase text
        tasks_text = entry['Tasks']  # Extract the task description string

        # For each phase, append the phase and the tasks underneath it
        tasks = extract_tasks(tasks_text)
        for task in tasks:
            # Randomly assign duration and percentage completion for demonstration purposes
            duration = random.randint(2, 10)  # Random duration between 2-10 days
            percentage_completion = 0  # All tasks are initially 0% completed
            
            # Random start and end date for demonstration purposes
            start_date = pd.Timestamp.now() + pd.DateOffset(days=random.randint(0, 30))
            end_date = start_date + pd.DateOffset(days=duration)
            
            # Assign random resources
            resource_name = assign_random_resource()
            
            # Add the task with additional columns to the project plan
            project_plan.append({
                "Percentage_completion": percentage_completion,
                "Phase": phase,
                "Task Name": task['Task Name'],
                "Duration": f"{duration} days",
                "Start Date": start_date.strftime("%Y-%m-%d"),
                "End Date": end_date.strftime("%Y-%m-%d"),
                "Resource Names": resource_name
            })

    return project_plan

In [114]:
project_plan = build_project_plan(task_list)

# Convert project plan to DataFrame for easier handling and to save to Excel
df_project_plan = pd.DataFrame(project_plan)

In [115]:
df_project_plan.shape

(159, 7)

In [108]:
df_project_plan.head()

Unnamed: 0,Phase,Task Name,Task Description,Duration,Start Date,End Date,Percentage_completion,Resource Names
0,Project Initiation,Kick-off Meeting:,- **Kick-off Meeting:**,2 days,2024-10-24,2024-10-26,0,Oracle-Client
1,Project Initiation,- Introduce the project team members and stake...,- Introduce the project team members and stake...,10 days,2024-11-08,2024-11-18,0,Oracle-Client
2,Project Initiation,"- Review the project objectives, scope, and ti...","- Review the project objectives, scope, and ti...",7 days,2024-10-27,2024-11-03,0,Oracle-Client
3,Project Initiation,- Discuss the overall project plan and next st...,- Discuss the overall project plan and next st...,8 days,2024-11-01,2024-11-09,0,Client
4,Project Initiation,Requirement Gathering:,- **Requirement Gathering:**,10 days,2024-11-21,2024-12-01,0,Oracle-Client


In [116]:
# Define your Object Storage details
namespace_name = "gc35013"
compartment_id = "ocid1.compartment.oc1..aaaaaaaaretksgipt3jgwfpzgh4ijyw54uynyfviaxs5li4wtl744fj4fi3q"
bucket_name = "ECHO"
folder_path = "PROJECT_SPEAK_MATE/DELIVERY_DOCS"
config = oci.config.from_file("config", "DEFAULT")
object_storage_client = oci.object_storage.ObjectStorageClient(config)

In [117]:
output_stream = io.BytesIO()
df_project_plan.to_excel(output_stream, index=False)
output_stream.seek(0)

# Upload to Object Storage
object_name = f"{folder_path}/updated_project_plan.xlsx"
response = object_storage_client.put_object(
    namespace_name=namespace_name,
    bucket_name=bucket_name,
    object_name=object_name,
    put_object_body=output_stream
)

print(f"Project plan successfully uploaded to Object Storage at: {object_name}")

Project plan successfully uploaded to Object Storage at: PROJECT_SPEAK_MATE/DELIVERY_DOCS/updated_project_plan.xlsx
