# Sample CloudSql Connection

# Need to set up for each instance - Add your VM's IP to the Authorized Networks
**1. Find the external IP of your JupyterLab VM**
https://console.cloud.google.com/compute/instances?project=adsp-34002-on02-sopho-scribe&authuser=1
 * Go to VM Instances
 * Find your JupyterLab VM
 * Copy the External IP address (looks like 34.91.100.45).

**2. Add the VM's IP to your Cloud SQL authorized networks**
https://console.cloud.google.com/sql/instances/currensee-sql/connections/networking?authuser=1&project=adsp-34002-on02-sopho-scribe
* Go to Cloud SQL instances. 
* Click your instance.
* Click Connections in the left sidebar.
* Scroll to Authorized networks → Add network.
* Name: anything like jupyterlab-vm
* Network: paste the external IP you just copied (e.g., 34.91.100.45/32)
* IMPORTANT: Add /32 to allow only that single IP.
* Click Save.

It will take ~30 seconds to update.

In [7]:
#%poetry add -q google-cloud-secret-manager==2.23.3
#%poetry add -q SQLAlchemy==2.0.40

In [8]:
#%poetry add psycopg2-binary sqlalchemy pandas

In [1]:
from google.cloud import secretmanager
import pandas as pd
from currensee.utils.db_utils import create_pg_engine

## IMPORTANT
The cell below will only work if you have a .env file defined at `<fl>_currensee/currensee/.env` with the credentials 
defined in `<fl>_currensee/currensee/.env.example`.

Instructions are located within the `.env.example` file with how to fill out the credentials properly.

#### Create SQLAlchemy engine

In [2]:
# Define DB_NAME
DB_NAME = 'crm'

In [3]:
engine = create_pg_engine(
   db_name=DB_NAME
)

ModuleNotFoundError: No module named 'psycopg2'

In [9]:
df_result = pd.read_sql("SELECT * FROM Employees limit 10", con=engine)
print(df_result)

NameError: name 'engine' is not defined

# Generate Fake Data

In [None]:
# %poetry add faker

In [None]:
import pandas as pd
import random
import os
import re
from faker import Faker

# Initialize Faker instance
#Faker is a Python package that integrates fake data for you.

#Some hard coded data of publicly traded companies to be our mock clients


In [None]:

fake = Faker()

# Helper function to generate synthetic employee data
def generate_employee_data(num_employees=10, ourcompany_name = 'bankwell'):
    Company = ourcompany_name
    employees = [
        {
            'EmployeeID': fake.unique.uuid4(),
            'FirstName': "Jane",
            'LastName': "Moneypenny",
            'Title': "Relationship Manager",
            'Email': "jane.moneypenny1@bankwell.com",
            'Phone': fake.phone_number(),
            'HireDate': fake.date_this_decade(),
            'Department': 'Enterprise Investment',
            'Market': 'San Fransisco'
        }
    ]

    for _ in range(num_employees-1):
        EmployeeID = fake.unique.uuid4()
        FirstName = fake.first_name()
        LastName = fake.last_name()
        Title = random.choice(['Finance Assistant', 'Financial Advisor', 'Senior Relationship Manager', 'Product Specialist', 'Relationship Manager'])
        Phone = fake.phone_number()
        Department = random.choice(['Enterprise Investment', 'Small Business Investment', 'Operations', 'Sales', 'Customer Support'])
        HireDate = fake.date_this_decade()
        Market = random.choice(['San Fransisco', 'New York City', 'Boston', 'Denver', 'Los Angeles', 'Miami', 'Washington DC', 'Seattle', 'Dallas', 'Chicago'])
        Company_clean = re.sub(r'\W+', '', Company).lower()
        Email = f"{FirstName.lower()}.{LastName.lower()}@{Company_clean}.com"


        employees.append({
            'EmployeeID': EmployeeID,
            'FirstName': FirstName,
            'LastName': LastName,
            'Title': Title,
            'Email': Email,
            'Phone': Phone,
            'HireDate': HireDate,
            'Department': Department,
            'Market': Market
        })
    return pd.DataFrame(employees)

# Helper function to generate point of contact and info for a Company
def generate_point_of_contact(Company_name):
    AccountID = fake.unique.uuid4()
    FirstName = fake.first_name()
    LastName = fake.last_name()
    ContactTitle = random.choice(["Senior Director", "Manager", "Director", "VP", "Consultant"])
    Phone = fake.phone_number()
    Website = fake.url()
    Location = random.choice(['San Fransisco', 'New York City', 'Boston', 'Denver', 'Los Angeles', 'Miami', 'Washington DC', 'Seattle', 'Dallas', 'Chicago'])
    AnnualRevenue = random.randint(1000000, 50000000)
    TotalAccountBal = random.randint(1000000, 50000000)

    # Clean Company name for use in email
    Company_clean = re.sub(r'\W+', '', Company_name).lower()
    Email = f"{FirstName.lower()}.{LastName.lower()}@{Company_clean}.com"

    return {
        "AccountID": AccountID,
        "ContactFirstName": FirstName,
        "ContactLastName": LastName,
        "ContactTitle": ContactTitle,
        "Phone": Phone,
        "Email": Email,
        "Website": Website,
        "Location": Location,
        "AnnualRevenue": AnnualRevenue,
        "TotalAccountBal": TotalAccountBal
    }


def generate_account_data(num_accounts=5, clients_company = []):
# Build the data
    accounts = []
    for company in clients_company:
        contact = generate_point_of_contact(company["Company"])
        record = {
          "Company": company["Company"],
          "industry": company["industry"],
          **contact
      }
        accounts.append(record)
    return pd.DataFrame(accounts)




# Helper function to generate synthetic opportunity data
def generate_opportunity_data(accounts_df, num_opportunities_per_account=3):
    opportunities = []
    for _, account in accounts_df.iterrows():
        num_opportunities = random.randint(1, num_opportunities_per_account)
        for _ in range(num_opportunities):
            opportunities.append({
                'OpportunityID': fake.unique.uuid4(),
                'AccountID': account['AccountID'],
                'OpportunityName': fake.bs(),
                'Stage': random.choice(['Prospecting', 'Qualification', 'Proposal', 'Negotiation', 'Won', 'Lost', 'Closed']),
                'Type': random.choice(['New Business', 'Existing Business', 'Renewal', 'Upsell']),
                'CloseDate': fake.date_this_year(),
                'Amount': random.randint(50000, 500000),
            })
    return pd.DataFrame(opportunities)

# Helper function to generate synthetic employee-contact relationship data
def generate_employee_contact_data(employees_df, accounts_df, num_relationships_per_employee=2):
    relationships = []
    for _, employee in employees_df.iterrows():
        num_relationships = random.randint(1, num_relationships_per_employee)
        for _ in range(num_relationships):
            account = random.choice(accounts_df['AccountID'].tolist())
            relationships.append({
                'EmployeeID': employee['EmployeeID'],
                'EmployeeFirstName': employee['FirstName'],
                'EmployeeLastName': employee['LastName'],
                'AccountID': account,
                'Company': accounts_df.loc[accounts_df['AccountID'] == account, 'Company'].iloc[0],
                'Industry': accounts_df.loc[accounts_df['AccountID'] == account, 'industry'].iloc[0],
                'ContactFirstName': accounts_df.loc[accounts_df['AccountID'] == account, 'ContactFirstName'].iloc[0],
                'ContactLastName': accounts_df.loc[accounts_df['AccountID'] == account, 'ContactLastName'].iloc[0],
                'ContactEmail': accounts_df.loc[accounts_df['AccountID'] == account, 'Email'].iloc[0],
                'ContactTitle': accounts_df.loc[accounts_df['AccountID'] == account, 'ContactTitle'].iloc[0],
                'ContactPhone': accounts_df.loc[accounts_df['AccountID'] == account, 'Phone'].iloc[0],
            })
    return pd.DataFrame(relationships)


def generate_portfolios(df_accounts, max_positions=10, instruments=[]):
    portfolio_records = []

    for _, row in df_accounts.iterrows():
        AccountId = row["AccountID"]
        Company = row["Company"]
        num_positions = random.randint(5, 10)
        positions = random.sample(instruments, num_positions)
        for symbol, instrument_type in positions:
            portfolio_records.append({
                "AccountID": AccountId,
                "Company_name": Company,
                "symbol": symbol,
                "instrument_type": instrument_type
            })

    return pd.DataFrame(portfolio_records)



In [None]:
# Right now companies can only be selected from this list of publicly traded companies 
# Likely want this to be random instead..
clients_company_info = [
        {"Company": "Broadcom", "industry": "Technology"},
        {"Company": "Cisco", "industry": "Technology"},
        {"Company": "Palantir Technologies", "industry": "Technology"},
        {"Company": "Fiserv", "industry": "Technology"},
        {"Company": "Atlassian", "industry": "Technology"},
        {"Company": "Leidos", "industry": "Technology"},
        {"Company": "Duolingo", "industry": "Technology"},
        {"Company": "Logitech", "industry": "Technology"},
        {"Company": "Celestica", "industry": "Technology"},
        {"Company": "Dropbox", "industry": "Technology"},
        {"Company": "Plexus", "industry": "Technology"},
        {"Company": "Silicon Laboratories", "industry": "Technology"},
        {"Company": "Mobix Labs", "industry": "Technology"},
        {"Company": "Mariott", "industry": "Hospitality"},
        {"Company": "InterContinental Hotels Group", "industry": "Hospitality"},
        {"Company": "Sonder Holdings", "industry": "Hospitality"},
        {"Company": "Hyatt Hotels", "industry": "Hospitality"},
        {"Company": "Royal Caribbean Cruises", "industry": "Hospitality"},
        {"Company": "UnitedHealth", "industry": "Healthcare"},
        {"Company": "Johnson & Johnson", "industry": "Healthcare"},
        {"Company": "AbbVie", "industry": "Healthcare"},
        {"Company": "Novo Nordisk", "industry": "Healthcare"},
        {"Company": "Abbott Laboratories", "industry": "Healthcare"},
        {"Company": "AstraZeneca", "industry": "Healthcare"},
        {"Company": "Merck & Co", "industry": "Healthcare"},
        {"Company": "Intuitive Surgical", "industry": "Healthcare"},
        {"Company": "Medtronic", "industry": "Healthcare"},
        {"Company": "Zoetis", "industry": "Healthcare"},
        {"Company": "Humana", "industry": "Healthcare"},
        {"Company": "Illumina", "industry": "Healthcare"},
        {"Company": "Guardant Health", "industry": "Healthcare"},
        {"Company": "Rhythm Pharmaceuticals", "industry": "Healthcare"},
        {"Company": "Amedisys", "industry": "Healthcare"},
        {"Company": "Rivian Automotive", "industry": "Automotive"},
        {"Company": "Fordy", "industry": "Automotive"},
        {"Company": "lululemon athletica", "industry": "Retail"},
        {"Company": "DICK'S Sporting Goods", "industry": "Retail"},
        {"Company": "GameStop Corp", "industry": "Retail"},
        {"Company": "Texas Roadhouse", "industry": "Retail"},
        {"Company": "Hasbro", "industry": "Retail"},
        {"Company": "Mattel", "industry": "Retail"},
        {"Company": "Wayfair", "industry": "Retail"},
        {"Company": "Peloton", "industry": "Retail"},
        {"Company": "Sally Beauty", "industry": "Retail"},
        {"Company": "Lifetime Brand", "industry": "Retail"},
        {"Company": "Allbirds", "industry": "Retail"},
        {"Company": "Walmart", "industry": "Retail"},
        {"Company": "Tyson Foods", "industry": "Retail"},
        {"Company": "Sprouts Farmers Market", "industry": "Retail"},
        {"Company": "Dollar Tree", "industry": "Retail"},
        {"Company": "Stride", "industry": "Retail"},
        {"Company": "Spectrum Brands", "industry": "Retail"},
        {"Company": "Udemy", "industry": "Retail"},
        {"Company": "Vital Farms", "industry": "Retail"},
        {"Company": "Graham Holdings Company", "industry": "Retail"},
        {"Company": "Hims & Hers Health", "industry": "Retail"},
        {"Company": "Smithfield Foods", "industry": "Retail"},
        {"Company": "Albertsons Companies", "industry": "Retail"},
        {"Company": "Albany International", "industry": "Manufacturing"},
        {"Company": "IT Tech Packaging", "industry": "Manufacturing"},
        {"Company": "Lockheed Martin Corporation", "industry": "Manufacturing"},
        {"Company": "Landstar System", "industry": "Manufacturing"},
        {"Company": "Hexcel Corporation", "industry": "Manufacturing"},
        {"Company": "AeroVironment", "industry": "Manufacturing"},
        {"Company": "Matson", "industry": "Manufacturing"},
        {"Company": "McGrath RentCorp", "industry": "Manufacturing"},
        {"Company": "Mueller Industries", "industry": "Manufacturing"},
        {"Company": "Dolby Laboratories", "industry": "Manufacturing"},
        {"Company": "ManpowerGroup", "industry": "Manufacturing"},
        {"Company": "Welltower", "industry": "RealEstate"},
        {"Company": "Iron Mountain Incorporated", "industry": "RealEstate"},
        {"Company": "Camden Property", "industry": "RealEstate"},
        {"Company": "CubeSmart", "industry": "RealEstate"},
        {"Company": "Federal Realty Investment Trust", "industry": "RealEstate"},
        {"Company": "Essential Properties Realty", "industry": "RealEstate"},
        {"Company": "Compass", "industry": "RealEstate"},
        {"Company": "Medical Properties Trust", "industry": "RealEstate"},
        {"Company": "Broadstone", "industry": "RealEstate"},
        {"Company": "Ladder Capital Corp", "industry": "RealEstate"},
        {"Company": "Peakstone Realty Trus", "industry": "RealEstate"},
        {"Company": "Fathom Holdings", "industry": "RealEstate"},
        {"Company": "Presidio Property Trust", "industry": "RealEstate"},
        {"Company": "Service Properties Trust", "industry": "RealEstate"},
    ]

stocks = ['AAPL', 'MSFT', 'GOOGL', 'AMZN', 'TSLA', 'NVDA', 'META', 'JPM', 'V', 'UNH']
bonds = ['US10Y', 'US30Y', 'CORP1', 'CORP2', 'MUNI1', 'MUNI2']
mutual_funds = ['VFIAX', 'SWPPX', 'FXAIX', 'VTSAX', 'FZROX', 'SPY']
# All symbols with types
instruments = (
    [(sym, 'Stock') for sym in stocks] +
    [(sym, 'Bond') for sym in bonds] +
    [(sym, 'Mutual Fund') for sym in mutual_funds]
)

# Generate synthetic data for the tables
employees_df = generate_employee_data(num_employees=100, ourcompany_name = 'bankwell')
accounts_df = generate_account_data(num_accounts=1000, clients_company = clients_company_info)
opportunities_df = generate_opportunity_data(accounts_df, num_opportunities_per_account=3)
contacts_df = generate_employee_contact_data(employees_df, accounts_df, num_relationships_per_employee=30)
portfolio_df = generate_portfolios(accounts_df, max_positions=10, instruments = instruments)

# Print the first few rows of each DataFrame
print("Employees Data:")
print(employees_df.head())

print("\nAccounts Data:")
print(accounts_df.head())

print("\nOpportunities Data:")
print(opportunities_df.head())

print("\nEmployee-Contact Relationships Data:")
print(contacts_df.head())

print("\nPortfolio Info for Each Account")
print(portfolio_df.head())





# Load to database

In [None]:
employees_df.head()

In [None]:
employees_df.shape

In [None]:
employees_df.to_sql('employees', engine, if_exists='replace', index=False)

In [None]:
df = pd.read_sql("SELECT * from Employees", con=engine)
df.head()

In [None]:
accounts_df.head()

In [None]:
accounts_df.shape

In [None]:
accounts_df.to_sql('clients_contact', engine, if_exists='replace', index=False)

In [None]:
contacts_df.head()

In [None]:
contacts_df.to_sql('client_alignment', engine, if_exists='replace', index=False)

In [None]:
portfolio_df.head()

In [None]:
portfolio_df.to_sql('portfolio', engine, if_exists='replace', index=False)

# Test Results

In [None]:
pd.read_sql("SELECT * FROM portfolio limit 10", con=engine)

In [None]:
pd.read_sql("SELECT * FROM client_alignment limit 10", con=engine)

In [None]:
import datetime
import pytz

print(f"Notebook last execution time: {datetime.datetime.now(pytz.timezone('US/Central')).strftime('%a, %d %B %Y %H:%M:%S')}")