# Stanford DCI 2023 DCI fellow summary

### Load packages

In [1]:
%reload_ext autoreload
%autoreload 2

import sys
import pprint
import pandas as pd
from IPython.display import HTML

sys.path.append(".")
sys.path.append("..")
sys.path.append("../..")

In [2]:
!{sys.executable} -m pip install -q bs4

In [62]:
from uniflow import Context, PromptTemplate
from uniflow.flow.client import ExtractClient
from uniflow.flow.config import ExtractHTMLConfig
from uniflow.flow.client import TransformClient
from uniflow.flow.config  import TransformConfig
from uniflow.flow.config import PipelineConfig
from uniflow.op.model.model_config  import OpenAIModelConfig
from uniflow.pipeline import MultiFlowsPipeline

from uniflow.viz import Viz
from uniflow.flow.flow_factory import FlowFactory

FlowFactory.list()

{'extract': ['ExtractHTMLFlow',
  'ExtractImageFlow',
  'ExtractIpynbFlow',
  'ExtractMarkdownFlow',
  'ExtractPDFFlow',
  'ExtractTxtFlow'],
 'transform': ['TransformAzureOpenAIFlow',
  'TransformCopyFlow',
  'TransformGoogleFlow',
  'TransformGoogleMultiModalModelFlow',
  'TransformHuggingFaceFlow',
  'TransformLMQGFlow',
  'TransformOpenAIFlow'],
 'rater': ['RaterFlow']}

### Prepare the input data

We can not only load local html files by `filename`, but also load online html files by providing the `url`.

In [63]:
input_data = [
    {"filename": "https://dci.stanford.edu/fellow/a-j-agarwal/"},
    {"filename": "https://dci.stanford.edu/fellow/nancy-andrews/"},
    {"filename": "https://dci.stanford.edu/fellow/max-bosel/"},
    {"filename": "https://dci.stanford.edu/fellow/ana-botero/"},
    {"filename": "https://dci.stanford.edu/fellow/molly-c-campbell/"},
    {"filename": "https://dci.stanford.edu/fellow/franklin-carone/"},
    {"filename": "https://dci.stanford.edu/fellow/david-ciulla/"},
    {"filename": "https://dci.stanford.edu/fellow/ranjana-clark/"},
    {"filename": "https://dci.stanford.edu/fellow/jim-cowie/"},
    {"filename": "https://dci.stanford.edu/fellow/jason-green/"},
    {"filename": "https://dci.stanford.edu/fellow/michele-grieshaber/"},
    {"filename": "https://dci.stanford.edu/fellow/melissa-hollatz/"},
    {"filename": "https://dci.stanford.edu/fellow/kim-jabal/"},
    {"filename": "https://dci.stanford.edu/fellow/joan-jeffri/"},
    {"filename": "https://dci.stanford.edu/fellow/robin-joy/"},
    {"filename": "https://dci.stanford.edu/fellow/john-kleinheinz/"},
    {"filename": "https://dci.stanford.edu/fellow/marsha-kleinheinz/"},
    {"filename": "https://dci.stanford.edu/fellow/catherine-martineau/"},
    {"filename": "https://dci.stanford.edu/fellow/william-mccarthy/"},
    {"filename": "https://dci.stanford.edu/fellow/denis-morozov/"},
    {"filename": "https://dci.stanford.edu/fellow/roswitha-mueller/"},
    {"filename": "https://dci.stanford.edu/fellow/maria-pacheco/"},
    {"filename": "https://dci.stanford.edu/fellow/madhu-rao/"},
    {"filename": "https://dci.stanford.edu/fellow/jose-revuelta/"},
    {"filename": "https://dci.stanford.edu/fellow/veronica-rogers/"},
    {"filename": "https://dci.stanford.edu/fellow/jim-rowe/"},
    {"filename": "https://dci.stanford.edu/fellow/avanish-sahai/"},
    {"filename": "https://dci.stanford.edu/fellow/faye-sahai/"},
    {"filename": "https://dci.stanford.edu/fellow/nirmala-sankaran/"},
    {"filename": "https://dci.stanford.edu/fellow/sonja-k-schoenwald/"},
    {"filename": "https://dci.stanford.edu/fellow/ramon-segismundo-dba/"},
    {"filename": "https://dci.stanford.edu/fellow/steven-seleznow/"},
    {"filename": "https://dci.stanford.edu/fellow/felice-silk/"},
    {"filename": "https://dci.stanford.edu/fellow/david-silk/"},
    {"filename": "https://dci.stanford.edu/fellow/priscila-costa-snel-correa/"},
    {"filename": "https://dci.stanford.edu/fellow/luciano-snel-correa/"},
    {"filename": "https://dci.stanford.edu/fellow/timothy-a-steinert/"},
    {"filename": "https://dci.stanford.edu/fellow/keller-strother/"},
    {"filename": "https://dci.stanford.edu/fellow/sunit-survase/"},
    {"filename": "https://dci.stanford.edu/fellow/susan-vobejda/"},
    {"filename": "https://dci.stanford.edu/fellow/joseph-weber/"},
]

### Extract the html file via ExtractClient with customized post extract fn to transform data into desired format

In [64]:
def post_extract_fn(data):
    def get_subset(strings, start, end):
        try:
            start_index = strings.index(start)
            end_index = strings.index(end)
            return strings[start_index+1:end_index]
        except ValueError:
            return []
        
    return {"text": ["\n".join(get_subset(data['text'], "Select Page", "2023 FELLOWS & PARTNERS"))]}

extract_config = ExtractHTMLConfig(
    post_extract_fn=post_extract_fn
)

### Transform to the desired format via TransformClient

In [65]:
transform_config = TransformConfig(
    flow_name="TransformOpenAIFlow",
    model_config=OpenAIModelConfig(
        response_format={"type": "json_object"},
        temperature=0),
    prompt_template=PromptTemplate(
            instruction="""
            Please provide a concise summary of the professional background and achievements of the individual featured on this webpage, including key roles, achievements with details numbers, and educational background. Also, identify the primary industry in which this person has made their career. Ensure the summary is brief and informative, capturing the essence of their professional journey and industry involvement.
            Follow the format of the examples below to include both name, summary, and industry in response.
            """,
            # few_shot_prompt=[
            #     Context(
            #         context="""
            #         A.J. Agarwal is a Senior Managing Director in Blackstone's Real Estate Group, where he launched  Blackstone's U.S. Core+ real estate investing business, with over $125 billion of equity under management today.  He joined Blackstone in 1992 and is a member of the firm's Real Estate Investment Committee.  Most recently, he served as President and Director of Blackstone Real Estate Income Trust, Blackstone's largest investment vehicle with over $65 billion of market capitalization and over $125 billion of real estate assets.  Prior to launching Blackstone's Core+ real estate investing business in 2014, A.J. was Co-Head of U.S. Acquisitions for Blackstone's opportunistic equity investing business and he oversaw more than $50 billion of real estate investments across all real estate classes.  
            #         A.J. graduated from Princeton University, where he studied Politics and graduated magna cum laude and Phi Beta Kappa, and received his MBA from Stanford University Graduate School of Business. He is a member of the Council on Foreign Relations  as well as the Stanford Alumni Real Estate Council.
            #         He is married to Roswitha Mueller-Agarwal and has two children, a daughter (Stanford '26) and son who lives in New York City.  A.J. is an avid tennis player and cyclist.""",
            #         name="""A.J. Agarwal""",
            #         summary="""A.J. Agarwal, a Senior Managing Director at Blackstone's Real Estate Group, is notable for launching and managing the firm's U.S. Core+ real estate investing business, overseeing over $125 billion of equity. His career includes key positions such as President and Director of Blackstone Real Estate Income Trust and Co-Head of U.S. Acquisitions. Agarwal, a Princeton University and Stanford Graduate School of Business alumnus, has significantly impacted the real estate investment sector.""",
            #         industry="""Real Estate Investment"""
            #     ),
            #     Context(
            #         context="""...""",
            #         name="""...""",
            #         summary="""...""",
            #         industry="""..."""
            #     ),
            # ],
        )
    )


In [66]:
p = MultiFlowsPipeline(PipelineConfig(
    extract_config=extract_config,
    transform_config=transform_config,
))

In [67]:
output = p.run(input_data)

100%|██████████| 41/41 [00:12<00:00,  3.18it/s]
  5%|▍         | 2/41 [00:04<01:28,  2.27s/it]INFO [abs_model]: Attempt 1 failed, retrying...
100%|██████████| 41/41 [02:14<00:00,  3.29s/it]


In [68]:
name = []
summary = []
industry = []
url = []
for o, u in zip(output, input_data):
    name.append(o['output'][0]['response'][0]['name'])
    summary.append(o['output'][0]['response'][0]['summary'])
    industry.append(o['output'][0]['response'][0]['industry'])
    url.append(u['filename'])


# Sample data
data = {
    'Name': name,
    'Summary': summary,
    'Industry': industry,
    'URL': url
}

# Adjusting display settings to avoid truncation
pd.set_option('display.max_rows', None)  # Adjust to display all rows
pd.set_option('display.max_columns', None)  # Adjust to display all columns
pd.set_option('display.width', None)  # Adjust to ensure each row uses optimal width
pd.set_option('display.max_colwidth', None)  # Adjust to display full content of each cell

# Creating a DataFrame
df = pd.DataFrame(data)
df['URL'] = df['URL'].apply(lambda x: f'<a href="{x}" target="_blank">{x}</a>')


# Displaying the DataFrame
display(HTML(df.to_html(escape=False)))


Unnamed: 0,Name,Summary,Industry,URL
0,A.J. Agarwal,"A.J. Agarwal is a Senior Managing Director in Blackstone's Real Estate Group, where he launched Blackstone's U.S. Core+ real estate investing business, with over $125 billion of equity under management today. He joined Blackstone in 1992 and is a member of the firm's Real Estate Investment Committee. Most recently, he served as President and Director of Blackstone Real Estate Income Trust, Blackstone's largest investment vehicle with over $65 billion of market capitalization and over $125 billion of real estate assets. Prior to launching Blackstone's Core+ real estate investing business in 2014, A.J. was Co-Head of U.S. Acquisitions for Blackstone's opportunistic equity investing business and he oversaw more than $50 billion of real estate investments across all real estate classes. A.J. graduated from Princeton University, where he studied Politics and graduated magna cum laude and Phi Beta Kappa, and received his MBA from Stanford University Graduate School of Business. He is a member of the Council on Foreign Relations as well as the Stanford Alumni Real Estate Council.",Real Estate Investment,https://dci.stanford.edu/fellow/a-j-agarwal/
1,Nancy O. Andrews,"Nancy O. Andrews is a prominent figure in the community development industry, with a career spanning 40 years. She was the President and Chief Executive Officer of the Low Income Investment Fund (LIIF), where she grew the organization's assets from $35 million to $1 billion. Nancy's work influenced federal policy and led to capital innovations, such as outcomes-based investments like Equity with a Twist. She has also served on numerous boards and committees of community development and environmental organizations, including Bank of America's National Community Advisory Council and the Federal Reserve Board's Consumer Advisory Council. Nancy's career also includes work in the Clinton administration Departments of Treasury and Housing and Urban Development, as well as serving as the Deputy Director of the Ford Foundation's social investment portfolio.",Community Development,https://dci.stanford.edu/fellow/nancy-andrews/
2,Max Bosel,"Max Bosel has had an extensive career in public safety in the San Francisco Bay Area, beginning as a high school volunteer and culminating with 31 years as a peace officer. He served as the police chief of Mountain View, CA, and founded 'Code 4 Finances' to provide financial wellness education and coaching tailored to first responders. Max has wide-ranging leadership experiences and holds a BA in Management, an MPA, and a Graduate Certificate in Financial Life Planning. He is a graduate of the Harvard Kennedy School of Government's State and Local Government Executive Program and the FBI's National Academy.",Public Safety,https://dci.stanford.edu/fellow/max-bosel/
3,Ana Mercedes Botero,"Ana Mercedes Botero is a Colombian lawyer with extensive experience in international diplomacy and social innovation. She has served in various roles at the Development Bank of Latin America-Corporacion Andina de Fomento (CAF), including Director of the Secretariat and External Affairs. Ana has been involved in creating community-driven development solutions for vulnerable groups, focusing on local empowerment and social inclusion. She holds two master's degrees from Columbia University and has completed leadership programs at Harvard and the World Economic Forum. Ana's career has been primarily in the international development and social innovation industry.",International Development and Social Innovation,https://dci.stanford.edu/fellow/ana-botero/
4,Molly C. Campbell,"Molly C. Campbell is a former Director of the Port of New York and New Jersey and former CFO and Deputy Executive Director at the Port of Los Angeles. She currently serves as an infrastructure advisor on behalf of the US Department of Treasury, Office of Technical Assistance and a Senior Advisor for the Boston Consulting Group (BCG). She is a 2019 Harvard University Advanced Leadership Initiative Fellow. Molly has had a nearly 28-year career with the City of Los Angeles in various high profile and impactful roles, spending more than half of her tenure at the Port of Los Angeles responsible for the day-to-day administration at the nation's busiest port complex. During her three years overseeing the Port of New York and New Jersey, she oversaw record cargo volumes and achieved major milestones. Molly is also a member of the board of directors of Granite Construction and a 6-year member of the board at East West Bank, serving on the Audit and Compensation Committees of both organizations, as well as the Nominating and Corporate Governance Committee for East West Bank.",Maritime and Infrastructure,https://dci.stanford.edu/fellow/molly-c-campbell/
5,Frank Carone,"Frank Carone has had a 40-year career in banking, including serving as the first Vice President of the California Construction Lending Department, where he helped finance and administer land acquisition, development, and construction projects for small and medium-sized real estate developers throughout California. He has held various roles in banking, including strategic planning, wholesale lending, retail, integration, and assimilations for new bank acquisitions. Currently, he is a senior advisor in private banking at JP Morgan Chase. Frank graduated from California State University, Northridge with a Bachelor's Degree in Economics and Business. He is an avid reader, Masters bike racer, extensive traveler, amateur gardener and cook, enjoys music and the arts, particularly live theatre. Frank has two adult daughters living in Portland, Oregon and Santa Ana, California.",Banking,https://dci.stanford.edu/fellow/franklin-carone/
6,David Ciulla,"David Ciulla is an entrepreneur and educator with a diverse background in business, education, and nonprofit work. He co-founded Sports Basement, a San Francisco-based retail company, and ReadWorks, an award-winning, nonprofit, educational technology platform. David has also founded and managed various ventures, including 'The Anything-for-a-buck Paint Crew' and has served as a consultant for community development projects. He holds a BA in Political Science and English from Stanford and an MA in International Relations from The Johns Hopkins School of Advanced International Studies. David is actively involved in outdoor activities, sports, and community initiatives, and is the co-founder and President of the Viola Club San Francisco, an official supporters club of the Italian Serie A soccer team Fiorentina.","Retail, Education, Nonprofit",https://dci.stanford.edu/fellow/david-ciulla/
7,Ranjana Clark,"Ranjana Clark is a seasoned global financial services leader and board member. She was the Head of Global Transaction Banking at Mitsubishi UFJ Financial Group, and previously the Global CMO and Chief Customer Officer at PayPal. Prior to PayPal, Ranjana was on the executive team at Wachovia (now part of Wells Fargo) and President of Global Business Payments and Head of Global Strategy at Western Union. Ranjana has a BA in Economics from the University of Delhi, an MBA from the Indian Institute of Management, Ahmedabad, and an MBA from the Fuqua School of Business at Duke University. She serves on the Board of Directors of InvestCloud, StanCorp Financial Group and Xometry, the President’s Leadership Council of the Asia Foundation, and is a founding member of Neythri. She is a former member of the Board of Directors/ Visitors of the Haas School of Business of the University of California, Berkeley, Bay Area Council, Fuqua School of Business, Committee for Economic Development, and the Association of National Advertisers. Ranjana resides in San Francisco with her husband, Michael. They have a grown daughter, who is a Stanford alum.",Global Financial Services,https://dci.stanford.edu/fellow/ranjana-clark/
8,Jim Cowie,"Jim Cowie is a veteran and advocate with a diverse professional background. After serving as an officer in the United States Navy, he pursued a legal career, working as in-house counsel for over 25 years at companies in the financial, healthcare, semiconductor, and biotechnology industries. He spent more than 12 years as General Counsel at Cadence Design Systems, Inc. Jim is also actively involved in various advisory and leadership roles at organizations such as the Markkula Center for Applied Ethics at Santa Clara University and Stanford University's Buck/Cardinal Club. He is currently based in Palo Alto, California and enjoys playing basketball, golf, and pickleball, as well as engaging in outdoor activities like biking, hiking, swimming, and running.","Legal, Technology, Healthcare, Semiconductor, Biotechnology",https://dci.stanford.edu/fellow/jim-cowie/
9,Jason Green,"Jason Green is a founding partner of Emergence Capital, where he has spent the last 25 years backing leading entrepreneurs building enterprise cloud software companies. He was an early investor in companies such as Salesforce, SalesLoft, SuccessFactors, Zoom, Veeva, Box, Bill.com, Yammer, and Doximity. Jason also focuses on philanthropic activities and investing in emerging managers with diverse backgrounds. He has been involved in founding and serving on the boards of various non-profit organizations. Jason has a background in economics and computer science from Dartmouth College and has worked at Bain & Company, Venrock, and US Venture Partners before founding Emergence Capital. He is also an avid tennis player, golfer, and wine enthusiast.",Venture Capital and Philanthropy,https://dci.stanford.edu/fellow/jason-green/


## End of the notebook

Check more Uniflow use cases in the [example folder](https://github.com/CambioML/uniflow/tree/main/example/model#examples)!

<a href="https://www.cambioml.com/" title="Title">
    <img src="../image/cambioml_logo_large.png" style="height: 100px; display: block; margin-left: auto; margin-right: auto;"/>
</a>