In [1]:
# Import necessary libraries

import glob # For file path matching
import sqlite3 # For SQLite database operations
import pandas as pd # For data manipulation and analysis

### Command Line Operations for SQLite

```bash
#Load SQLite on Koa
module load devel/SQLite

# Create or open the database
sqlite3 my_database.db

# list the tables in the db
.tables

# view the schema of a specific table
.schema table_name

# view the rows of a table
SELECT * FROM my_table; 

# exit
.exit
```

In [2]:
# This script reads CSV files from a specified directory and writes them to an SQLite database.

# Define the database file
db_file = 'ng_database.db'

# Connect to SQLite
conn = sqlite3.connect(db_file)
cursor = conn.cursor()

# Get list of CSV files
csv_files = glob.glob("Data/*.csv")

for file in csv_files:
    # Read CSV into DataFrame
    df = pd.read_csv(file)
    
    # Extract table name from file name (remove path and extension)
    table_name = file.split("/")[-1].replace(".csv", "")

    # Write DataFrame to SQLite
    df.to_sql(table_name, conn, if_exists="replace", index=False)

# Close connection
conn.close()

In [3]:
# Connect to the SQLite database
db_file = "ng_database.db"
conn = sqlite3.connect(db_file)

# Get a list of tables
query = "SELECT name FROM sqlite_master WHERE type='table';"
tables = pd.read_sql(query, conn)

print("Tables in the database:", tables)


Tables in the database:              name
0  NM_Residential
1  AL_Residential
2  WI_Residential
3  CA_Residential
4  KY_Residential


In [4]:
table_name = "NM_Residential"  # Replace with your actual table name
df = pd.read_sql(f"SELECT * FROM {table_name}", conn)
df

Unnamed: 0,year,month,state,residential,HDD_20
0,1989,1,NM,181.387097,18.558140
1,1989,2,NM,167.714286,15.573355
2,1989,3,NM,107.903226,10.191154
3,1989,4,NM,65.333333,6.605651
4,1989,5,NM,35.838710,4.167347
...,...,...,...,...,...
403,2022,8,NM,30.870968,0.913051
404,2022,9,NM,27.000000,2.033279
405,2022,10,NM,54.161290,7.461242
406,2022,11,NM,167.933333,15.038542


In [5]:
# Convert the 'name' column of the tables DataFrame into a list of table names
tables_list = tables['name'].tolist()

# Create a dictionary where each key is a table name and the value is the corresponding DataFrame
dfs = {table: pd.read_sql(f"SELECT * FROM {table}", conn) for table in tables_list}

# Example: Access the dictionary containing all tables as DataFrames
dfs

{'NM_Residential':      year  month state  residential     HDD_20
 0    1989      1    NM   181.387097  18.558140
 1    1989      2    NM   167.714286  15.573355
 2    1989      3    NM   107.903226  10.191154
 3    1989      4    NM    65.333333   6.605651
 4    1989      5    NM    35.838710   4.167347
 ..    ...    ...   ...          ...        ...
 403  2022      8    NM    30.870968   0.913051
 404  2022      9    NM    27.000000   2.033279
 405  2022     10    NM    54.161290   7.461242
 406  2022     11    NM   167.933333  15.038542
 407  2022     12    NM   203.322581  16.866253
 
 [408 rows x 5 columns],
 'AL_Residential':      year  month state  residential     HDD_20
 0    1989      1    AL   238.903226  10.141640
 1    1989      2    AL   251.571429  11.763194
 2    1989      3    AL   238.451613   7.031389
 3    1989      4    AL   157.400000   5.424619
 4    1989      5    AL    92.129032   2.375010
 ..    ...    ...   ...          ...        ...
 403  2022      8    AL  

In [6]:
dfs['NM_Residential']

Unnamed: 0,year,month,state,residential,HDD_20
0,1989,1,NM,181.387097,18.558140
1,1989,2,NM,167.714286,15.573355
2,1989,3,NM,107.903226,10.191154
3,1989,4,NM,65.333333,6.605651
4,1989,5,NM,35.838710,4.167347
...,...,...,...,...,...
403,2022,8,NM,30.870968,0.913051
404,2022,9,NM,27.000000,2.033279
405,2022,10,NM,54.161290,7.461242
406,2022,11,NM,167.933333,15.038542
