# Sample CloudSql Connection

# Need to set up for each instance - Add your VM's IP to the Authorized Networks
**1. Find the external IP of your JupyterLab VM**
https://console.cloud.google.com/compute/instances?project=adsp-34002-on02-sopho-scribe&authuser=1
 * Go to VM Instances
 * Find your JupyterLab VM
 * Copy the External IP address (looks like 34.91.100.45).

**2. Add the VM's IP to your Cloud SQL authorized networks**
https://console.cloud.google.com/sql/instances/currensee-sql/connections/networking?authuser=1&project=adsp-34002-on02-sopho-scribe
* Go to Cloud SQL instances. 
* Click your instance.
* Click Connections in the left sidebar.
* Scroll to Authorized networks → Add network.
* Name: anything like jupyterlab-vm
* Network: paste the external IP you just copied (e.g., 34.91.100.45/32)
* IMPORTANT: Add /32 to allow only that single IP.
* Click Save.

It will take ~30 seconds to update.

In [1]:
#%poetry add -q google-cloud-secret-manager==2.23.3
#%poetry add -q SQLAlchemy==2.0.40

In [2]:
#%poetry add psycopg2-binary sqlalchemy pandas

In [4]:
from google.cloud import secretmanager
import pandas as pd
import numpy as np
from currensee.utils.db_utils import create_pg_engine
from sqlalchemy import text

## IMPORTANT
The cell below will only work if you have a .env file defined at `<fl>_currensee/currensee/.env` with the credentials 
defined in `<fl>_currensee/currensee/.env.example`.

Instructions are located within the `.env.example` file with how to fill out the credentials properly.

#### Create SQLAlchemy engine

In [None]:
# Define DB_NAME
DB_NAME = 'crm'

In [None]:
engine = create_pg_engine(
   db_name=DB_NAME
)

In [None]:
df_result = pd.read_sql("SELECT * FROM Employees limit 10", con=engine)
print(df_result)

# Generate Fake Data

In [None]:
# %poetry add faker

In [None]:
import pandas as pd
import random
import os
import re
from faker import Faker

# Initialize Faker instance
#Faker is a Python package that integrates fake data for you.

#Some hard coded data of publicly traded companies to be our mock clients


In [None]:

fake = Faker()

# Helper function to generate synthetic employee data
def generate_employee_data(num_employees=10, ourcompany_name = 'bankwell'):
    company = ourcompany_name
    employees = [
        {
            'employee_id': fake.unique.uuid4(),
            'first_name': "Jane",
            'last_name': "Moneypenny",
            'title': "Relationship Manager",
            'email': "jane.moneypenny1@bankwell.com",
            'phone': fake.phone_number(),
            'hire_date': fake.date_this_decade(),
            'department': 'Enterprise Investment',
            'market': 'San Fransisco'
        }
    ]

    for _ in range(num_employees-1):
        employee_id = fake.unique.uuid4()
        first_name = fake.first_name()
        last_name = fake.last_name()
        title = random.choice(['Finance Assistant', 'Financial Advisor', 'Senior Relationship Manager', 'Product Specialist', 'Relationship Manager'])
        phone = fake.phone_number()
        department = random.choice(['Enterprise Investment', 'Small Business Investment', 'Operations', 'Sales', 'Customer Support'])
        hire_date = fake.date_this_decade()
        market = random.choice(['San Fransisco', 'New York City', 'Boston', 'Denver', 'Los Angeles', 'Miami', 'Washington DC', 'Seattle', 'Dallas', 'Chicago'])
        company_clean = re.sub(r'\W+', '', company).lower()
        email = f"{first_name.lower()}.{last_name.lower()}@{company_clean}.com"


        employees.append({
            'employee_id': employee_id,
            'first_name': first_name,
            'last_name': last_name,
            'title': title,
            'email': email,
            'phone': phone,
            'hire_date': hire_date,
            'department': department,
            'market': market
        })
    return pd.DataFrame(employees)

# Helper function to generate point of contact and info for a Company
def generate_point_of_contact(company_name):
    account_id = fake.unique.uuid4()
    first_name = fake.first_name()
    last_name = fake.last_name()
    contact_title = random.choice(["Senior Director", "Manager", "Director", "VP", "Consultant"])
    phone = fake.phone_number()
    Website = fake.url()
    Location = random.choice(['San Fransisco', 'New York City', 'Boston', 'Denver', 'Los Angeles', 'Miami', 'Washington DC', 'Seattle', 'Dallas', 'Chicago'])
    AnnualRevenue = random.randint(1000000, 50000000)
    TotalAccountBal = random.randint(1000000, 50000000)

    # Clean Company name for use in email
    company_clean = re.sub(r'\W+', '', company_name).lower()
    email = f"{first_name.lower()}.{last_name.lower()}@{company_clean}.com"

    return {
        "account_id": account_id,
        "contact_first_name": first_name,
        "contact_last_name": last_name,
        "contact_title": contact_title,
        "phone": phone,
        "email": email,
        "website": Website,
        "location": Location,
        "annual_revenue": AnnualRevenue,
        "total_account_bal": TotalAccountBal
    }


def generate_account_data(num_accounts=5, clients_company = []):
# Build the data
    accounts = []
    for company in clients_company:
        contact = generate_point_of_contact(company["company"])
        record = {
          "company": company["company"],
          "industry": company["industry"],
          **contact
      }
        accounts.append(record)
    return pd.DataFrame(accounts)




# Helper function to generate synthetic opportunity data
def generate_opportunity_data(accounts_df, num_opportunities_per_account=3):
    opportunities = []
    for _, account in accounts_df.iterrows():
        num_opportunities = random.randint(1, num_opportunities_per_account)
        for _ in range(num_opportunities):
            opportunities.append({
                'opportunity_id': fake.unique.uuid4(),
                'account_id': account['account_id'],
                'opportunity_name': fake.bs(),
                'stage': random.choice(['Prospecting', 'Qualification', 'Proposal', 'Negotiation', 'Won', 'Lost', 'Closed']),
                'type': random.choice(['New Business', 'Existing Business', 'Renewal', 'Upsell']),
                'close_date': fake.date_this_year(),
                'amount': random.randint(50000, 500000),
            })
    return pd.DataFrame(opportunities)

# Helper function to generate synthetic employee-contact relationship data
def generate_employee_contact_data(employees_df, accounts_df, num_relationships_per_employee=2):
    relationships = []
    for _, employee in employees_df.iterrows():
        num_relationships = random.randint(1, num_relationships_per_employee)
        for _ in range(num_relationships):
            account = random.choice(accounts_df['account_id'].tolist())
            relationships.append({
                'employee_id': employee['employee_id'],
                'employee_first_name': employee['first_name'],
                'employee_last_name': employee['last_name'],
                'account_id': account,
                'company': accounts_df.loc[accounts_df['account_id'] == account, 'company'].iloc[0],
                'industry': accounts_df.loc[accounts_df['account_id'] == account, 'industry'].iloc[0],
                'contact_first_name': accounts_df.loc[accounts_df['account_id'] == account, 'contact_first_name'].iloc[0],
                'contact_last_name': accounts_df.loc[accounts_df['account_id'] == account, 'contact_last_name'].iloc[0],
                'contact_email': accounts_df.loc[accounts_df['account_id'] == account, 'email'].iloc[0],
                'contact_title': accounts_df.loc[accounts_df['account_id'] == account, 'contact_title'].iloc[0],
                'contact_phone': accounts_df.loc[accounts_df['account_id'] == account, 'phone'].iloc[0],
            })
    return pd.DataFrame(relationships)



def generate_portfolios(df_accounts, max_positions=9, instruments=[]):
    portfolio_records = []

    for _, row in df_accounts.iterrows():
        account_id = row["account_id"]
        company = row["company"]
        total_account_bal = row["total_account_bal"]

        num_positions = random.randint(2, 5)
        positions = random.sample(instruments, num_positions)

        # Generate random proportions that sum to 1
        random_weights = np.random.rand(num_positions)
        random_weights /= random_weights.sum()

        for (symbol, instrument_type), weight in zip(positions, random_weights):
            fund_balance = round(total_account_bal * weight, 2)
            portfolio_records.append({
                "account_id": account_id,
                "company": company,
                "symbol": symbol,
                "fund_type": instrument_type,
                "tot_balance": total_account_bal,
                "fund_balance": fund_balance
            })

    return pd.DataFrame(portfolio_records)

In [None]:
# Right now companies can only be selected from this list of publicly traded companies 
# Likely want this to be random instead..
clients_company_info = [
        {"company": "Broadcom", "industry": "Technology"},
        {"company": "Cisco", "industry": "Technology"},
        {"company": "Palantir Technologies", "industry": "Technology"},
        {"company": "Fiserv", "industry": "Technology"},
        {"company": "Atlassian", "industry": "Technology"},
        {"company": "Leidos", "industry": "Technology"},
        {"company": "Duolingo", "industry": "Technology"},
        {"company": "Logitech", "industry": "Technology"},
        {"company": "Celestica", "industry": "Technology"},
        {"company": "Dropbox", "industry": "Technology"},
        {"company": "Plexus", "industry": "Technology"},
        {"company": "Silicon Laboratories", "industry": "Technology"},
        {"company": "Mobix Labs", "industry": "Technology"},
        {"company": "Mariott", "industry": "Hospitality"},
        {"company": "InterContinental Hotels Group", "industry": "Hospitality"},
        {"company": "Sonder Holdings", "industry": "Hospitality"},
        {"company": "Hyatt Hotels", "industry": "Hospitality"},
        {"company": "Royal Caribbean Cruises", "industry": "Hospitality"},
        {"company": "UnitedHealth", "industry": "Healthcare"},
        {"company": "Johnson & Johnson", "industry": "Healthcare"},
        {"company": "AbbVie", "industry": "Healthcare"},
        {"company": "Novo Nordisk", "industry": "Healthcare"},
        {"company": "Abbott Laboratories", "industry": "Healthcare"},
        {"company": "AstraZeneca", "industry": "Healthcare"},
        {"company": "Merck & Co", "industry": "Healthcare"},
        {"company": "Intuitive Surgical", "industry": "Healthcare"},
        {"company": "Medtronic", "industry": "Healthcare"},
        {"company": "Zoetis", "industry": "Healthcare"},
        {"company": "Humana", "industry": "Healthcare"},
        {"company": "Illumina", "industry": "Healthcare"},
        {"company": "Guardant Health", "industry": "Healthcare"},
        {"company": "Rhythm Pharmaceuticals", "industry": "Healthcare"},
        {"company": "Amedisys", "industry": "Healthcare"},
        {"company": "Rivian Automotive", "industry": "Automotive"},
        {"company": "Fordy", "industry": "Automotive"},
        {"company": "lululemon athletica", "industry": "Retail"},
        {"company": "DICK'S Sporting Goods", "industry": "Retail"},
        {"company": "GameStop Corp", "industry": "Retail"},
        {"company": "Texas Roadhouse", "industry": "Retail"},
        {"company": "Hasbro", "industry": "Retail"},
        {"company": "Mattel", "industry": "Retail"},
        {"company": "Wayfair", "industry": "Retail"},
        {"company": "Peloton", "industry": "Retail"},
        {"company": "Sally Beauty", "industry": "Retail"},
        {"company": "Lifetime Brand", "industry": "Retail"},
        {"company": "Allbirds", "industry": "Retail"},
        {"company": "Walmart", "industry": "Retail"},
        {"company": "Tyson Foods", "industry": "Retail"},
        {"company": "Sprouts Farmers Market", "industry": "Retail"},
        {"company": "Dollar Tree", "industry": "Retail"},
        {"company": "Stride", "industry": "Retail"},
        {"company": "Spectrum Brands", "industry": "Retail"},
        {"company": "Udemy", "industry": "Retail"},
        {"company": "Vital Farms", "industry": "Retail"},
        {"company": "Graham Holdings Company", "industry": "Retail"},
        {"company": "Hims & Hers Health", "industry": "Retail"},
        {"company": "Smithfield Foods", "industry": "Retail"},
        {"company": "Albertsons Companies", "industry": "Retail"},
        {"company": "Albany International", "industry": "Manufacturing"},
        {"company": "IT Tech Packaging", "industry": "Manufacturing"},
        {"company": "Lockheed Martin Corporation", "industry": "Manufacturing"},
        {"company": "Landstar System", "industry": "Manufacturing"},
        {"company": "Hexcel Corporation", "industry": "Manufacturing"},
        {"company": "AeroVironment", "industry": "Manufacturing"},
        {"company": "Matson", "industry": "Manufacturing"},
        {"company": "McGrath RentCorp", "industry": "Manufacturing"},
        {"company": "Mueller Industries", "industry": "Manufacturing"},
        {"company": "Dolby Laboratories", "industry": "Manufacturing"},
        {"company": "ManpowerGroup", "industry": "Manufacturing"},
        {"company": "Welltower", "industry": "RealEstate"},
        {"company": "Iron Mountain Incorporated", "industry": "RealEstate"},
        {"company": "Camden Property", "industry": "RealEstate"},
        {"company": "CubeSmart", "industry": "RealEstate"},
        {"company": "Federal Realty Investment Trust", "industry": "RealEstate"},
        {"company": "Essential Properties Realty", "industry": "RealEstate"},
        {"company": "Compass", "industry": "RealEstate"},
        {"company": "Medical Properties Trust", "industry": "RealEstate"},
        {"company": "Broadstone", "industry": "RealEstate"},
        {"company": "Ladder Capital Corp", "industry": "RealEstate"},
        {"company": "Peakstone Realty Trus", "industry": "RealEstate"},
        {"company": "Fathom Holdings", "industry": "RealEstate"},
        {"company": "Presidio Property Trust", "industry": "RealEstate"},
        {"company": "Service Properties Trust", "industry": "RealEstate"},
    ]


Bond_funds = ['BND', 'MUB', 'MBB', 'VCSH', 'TLT']
Equity_funds = ['VSMPX', 'FXAIX', 'FCNTX', 'FTIEX']
# All symbols with types
instruments = (
    [(sym, 'Bond Fund') for sym in Bond_funds ] +
    [(sym, 'Equity Fund') for sym in Equity_funds]
)

fund_dtl_df = pd.read_excel('equity_fund_example.xlsx')
# Generate synthetic data for the tables
employees_df = generate_employee_data(num_employees=100, ourcompany_name = 'bankwell')
accounts_df = generate_account_data(num_accounts=1000, clients_company = clients_company_info)
opportunities_df = generate_opportunity_data(accounts_df, num_opportunities_per_account=3)
contacts_df = generate_employee_contact_data(employees_df, accounts_df, num_relationships_per_employee=30)
portfolio_df = generate_portfolios(accounts_df, max_positions=10, instruments = instruments)

# Print the first few rows of each DataFrame
print("Employees Data:")
print(employees_df.head())

print("\nAccounts Data:")
print(accounts_df.head())

print("\nOpportunities Data:")
print(opportunities_df.head())

print("\nEmployee-Contact Relationships Data:")
print(contacts_df.head())

print("\nPortfolio Info for Each Account")
print(portfolio_df.head())


print("\nPortfolio detail")
print(fund_dtl_df.head())


In [None]:
portfolio_df.head(10)

# Load to database

### Employees Table

In [None]:
employees_df.head()

In [None]:
employees_df.shape

In [None]:
employees_df.to_sql('employees', engine, if_exists='replace', index=False)

In [None]:
alter_sql = """
ALTER TABLE "employees"
ADD PRIMARY KEY ("employee_id");
"""

with engine.connect() as connection:
    connection.execute(text(alter_sql))

In [None]:
df = pd.read_sql("SELECT * from employees", con=engine)
df.head()

### Clients Contact table

In [None]:
accounts_df.head()

In [None]:
accounts_df.shape

In [None]:
accounts_df.to_sql('clients_contact', engine, if_exists='replace', index=False)

In [None]:
alter_sql = """
ALTER TABLE "clients_contact"
ADD PRIMARY KEY ("account_id");
"""

with engine.connect() as connection:
    connection.execute(text(alter_sql))

### Client alignment table

In [None]:
contacts_df.head()

In [None]:
client_alignment = contacts_df.drop_duplicates()

In [None]:
client_alignment.to_sql('client_alignment', engine, if_exists='replace', index=False)

In [None]:
alter_sql = """
ALTER TABLE "client_alignment"
ADD PRIMARY KEY ("account_id", "employee_id");
"""

with engine.connect() as connection:
    connection.execute(text(alter_sql))

### Portfolio Table

In [None]:
portfolio_df.head()

In [None]:
portfolio_df.to_sql('portfolio', engine, if_exists='replace', index=False)

In [None]:
alter_sql = """
ALTER TABLE "portfolio"
ADD PRIMARY KEY ("account_id", "symbol");
"""

with engine.connect() as connection:
    connection.execute(text(alter_sql))

### Portfolio Detail

In [None]:
fund_dtl_df = pd.read_excel('equity_fund_example.xlsx')

In [None]:
fund_dtl_df.head()

In [None]:
fund_dtl_df.to_sql('fund_detail', engine, if_exists='replace', index=False)

In [None]:
alter_sql = """
ALTER TABLE "fund_detail"
ADD PRIMARY KEY ("ticker", "fund");
"""

with engine.connect() as connection:
    connection.execute(text(alter_sql))

# Test Results

In [None]:
pd.read_sql("SELECT * FROM portfolio limit 10", con=engine)

In [None]:
pd.read_sql("SELECT * FROM fund_detail limit 10", con=engine)

In [None]:
pd.read_sql("SELECT * FROM client_alignment limit 10", con=engine)

In [None]:
import datetime
import pytz

print(f"Notebook last execution time: {datetime.datetime.now(pytz.timezone('US/Central')).strftime('%a, %d %B %Y %H:%M:%S')}")