In [12]:
import pandas as pd
import sqlite3

# 1. Create Database and store data

In [72]:
# Create SQLite database connection (local file storage)
conn = sqlite3.connect('zack_database.db')

# Function: Save parquet file to a specified table
def import_parquet_to_table(parquet_file_path, table_name):
    df = pd.read_parquet(parquet_file_path)  # Load parquet file into DataFrame
    df.to_sql(table_name, conn, if_exists='replace', index=False)  # Write DataFrame to the specified table
    print(f"Data from {parquet_file_path} successfully imported into table '{table_name}'.")

# Function: Save CSV file to a specified table
def import_csv_to_table(csv_file_path, table_name):
    df = pd.read_csv(csv_file_path)  # Load CSV file into DataFrame
    df.to_sql(table_name, conn, if_exists='replace', index=False)  # Write DataFrame to the specified table
    print(f"Data from {csv_file_path} successfully imported into table '{table_name}'.")

# Example: Store multiple parquet files into different tables
parquet_files = {
    "t_zacks_fc.parquet": "t_zacks_fc",
    "t_zacks_fr.parquet": "t_zacks_fr",
    "t_zacks_mktv.parquet": "t_zacks_mktv",
    "t_zacks_shrs.parquet": "t_zacks_shrs"
}

# Import each parquet file into the corresponding table
for file_path, table_name in parquet_files.items():
    import_parquet_to_table(file_path, table_name)

# Import the CSV file into the corresponding table
import_csv_to_table("t_zacks_sectors.csv", "t_zacks_sectors")

Data from t_zacks_fc.parquet successfully imported into table 't_zacks_fc'.
Data from t_zacks_fr.parquet successfully imported into table 't_zacks_fr'.
Data from t_zacks_mktv.parquet successfully imported into table 't_zacks_mktv'.
Data from t_zacks_shrs.parquet successfully imported into table 't_zacks_shrs'.
Data from t_zacks_sectors.csv successfully imported into table 't_zacks_sectors'.


# 2. Extracting Data

In [74]:
# Example query: Extract data from 'zacks_data' table
query_zacks = """
SELECT 
    AVG(shrs.shares_out) AS avg_shares_outstanding
FROM 
    t_zacks_shrs AS shrs
JOIN 
    t_zacks_fc AS fc 
ON 
    shrs.ticker = fc.ticker 
    AND shrs.per_end_date = fc.per_end_date 
    AND shrs.per_type = fc.per_type
JOIN 
    t_zacks_sectors AS sectors
ON 
    fc.zacks_sector_code = sectors.zacks_sector_code
WHERE 
    sectors.sector = 'Computer & Technology'
    AND strftime('%Y', fc.per_end_date) = '2017';


"""

# Execute the query
zacks_df = pd.read_sql_query(query_zacks, conn)

# Display the result
zacks_df


Unnamed: 0,avg_shares_outstanding
0,595.025608


In [4]:
# Close database connection
conn.close()