In [1]:
import pandas as pd
import numpy as np
import psycopg2
from sql_functions import get_dataframe
from sql_functions import get_engine
engine = get_engine()


In [2]:
etfs_sql = 'SELECT * FROM public.etf_esg_merged_final_final'
etfs = get_dataframe(etfs_sql)

In [3]:
etfs['segment'].astype(str)

0                      Equity: U.S.  -  Large Cap
1                      Equity: U.S.  -  Large Cap
2                      Equity: U.S.  -  Large Cap
3                   Equity: U.S.  -  Total Market
4                      Equity: U.S.  -  Large Cap
                          ...                    
2178                      Equity: Global Consumer
2179    Asset Allocation: Global - Target Outcome
2180                Equity: U.S.  -  Total Market
2181              Equity: Russia  -  Total Market
2182                      Equity: Global Internet
Name: segment, Length: 2183, dtype: object

In [4]:
etfs[['main_segment', 'sub_segment']] = etfs.segment.str.split(':', expand = True)
etfs.head()

Unnamed: 0,ticker,fund_name,segment,one_yr_return,expense_ratio,aum,analyst_pick,grade,efficiency,tradability,...,p_as_of_date,esg_rating,quality_score_10,score_global_rank,sustainable_impact_exposure,sri_screening_criteria_exposure_%,score_peer_rank,carbon_intensity,main_segment,sub_segment
0,SPY,SPDR S&P 500 ETF Trust,Equity: U.S. - Large Cap,17.03,0.09,425980000000.0,True,A,99.0,100.0,...,2023-07-25,A,6.68,58.71,,9.75,58.71,122.31,Equity,U.S. - Large Cap
1,IVV,iShares Core S&P 500 ETF,Equity: U.S. - Large Cap,17.08,0.03,351620000000.0,True,A,100.0,100.0,...,2023-07-25,A,6.68,58.67,,9.67,58.67,122.41,Equity,U.S. - Large Cap
2,VOO,Vanguard 500 Index Fund,Equity: U.S. - Large Cap,17.09,0.03,336310000000.0,True,A,99.0,100.0,...,2023-07-25,A,6.68,58.75,,9.76,58.75,122.47,Equity,U.S. - Large Cap
3,VTI,Vanguard Total Stock Market ETF,Equity: U.S. - Total Market,16.46,0.03,319680000000.0,True,A,99.0,100.0,...,2023-07-25,A,6.52,52.51,,8.86,52.51,122.61,Equity,U.S. - Total Market
4,QQQ,Invesco QQQ Trust,Equity: U.S. - Large Cap,27.05,0.2,209300000000.0,True,A,97.0,100.0,...,2023-07-25,A,6.7,59.94,,8.36,59.94,47.01,Equity,U.S. - Large Cap


In [5]:
etfs.drop(["segment", "score_peer_rank", "sustainable_impact_exposure"], axis = 1, inplace = True)

In [6]:
etfs.head()

Unnamed: 0,ticker,fund_name,one_yr_return,expense_ratio,aum,analyst_pick,grade,efficiency,tradability,fit,...,p_five_yr,p_ten_yr,p_as_of_date,esg_rating,quality_score_10,score_global_rank,sri_screening_criteria_exposure_%,carbon_intensity,main_segment,sub_segment
0,SPY,SPDR S&P 500 ETF Trust,17.03,0.09,425980000000.0,True,A,99.0,100.0,96.0,...,11.79,12.48,2023-07-25,A,6.68,58.71,9.75,122.31,Equity,U.S. - Large Cap
1,IVV,iShares Core S&P 500 ETF,17.08,0.03,351620000000.0,True,A,100.0,100.0,96.0,...,11.82,12.54,2023-07-25,A,6.68,58.67,9.67,122.41,Equity,U.S. - Large Cap
2,VOO,Vanguard 500 Index Fund,17.09,0.03,336310000000.0,True,A,99.0,100.0,96.0,...,11.82,12.55,2023-07-25,A,6.68,58.75,9.76,122.47,Equity,U.S. - Large Cap
3,VTI,Vanguard Total Stock Market ETF,16.46,0.03,319680000000.0,True,A,99.0,100.0,99.0,...,10.98,11.98,2023-07-25,A,6.52,52.51,8.86,122.61,Equity,U.S. - Total Market
4,QQQ,Invesco QQQ Trust,27.05,0.2,209300000000.0,True,A,97.0,100.0,67.0,...,16.5,18.66,2023-07-25,A,6.7,59.94,8.36,47.01,Equity,U.S. - Large Cap


In [7]:
table_name = 'etfs_segmented'

In [8]:

# Write records stored in a dataframe to SQL database
if engine!=None:
    try:
        etfs.to_sql(name=table_name, # Name of SQL table variable
                        con=engine, # Engine or connection
                        schema='public', # your class schema variable
                        if_exists='replace', # Drop the table before inserting new values 
                        index=False, # Write DataFrame index as a column
                        chunksize=5000, # Specify the number of rows in each batch to be written at a time
                        method='multi') # Pass multiple values in a single INSERT clause
        print(f"The {table_name} table was imported successfully.")
    # Error handling
    except (Exception, psycopg2.DatabaseError) as error:
        print(error)
        engine = None
else:
    print('No engine')

The etfs_segmented table was imported successfully.
