# NaN Handling with SQLite and Pandas

Explore how 'nan' values within SQLite are handled by Pandas.

In [None]:
import sqlite3

import pandas as pd
from sqlalchemy import create_engine

In [None]:
# create connections for sqlite
# reference: https://nih.figshare.com/articles/dataset/Cell_Health_-_Cell_Painting_Single_Cell_Profiles/9995672
sqlite_err_conn = create_engine("sqlite:///testing_err_SQ00014613.sqlite").connect()
sqlite_err_fixed_conn = create_engine(
    "sqlite:///testing_err_fixed_SQ00014613.sqlite"
).connect()

## Using Table with 'nan' As-is (string values in affinity float column)

In [None]:
sql_stmt = """
select 
    Cytoplasm_Correlation_Costes_AGP_DNA, 
    typeof(Cytoplasm_Correlation_Costes_AGP_DNA)
from cytoplasm
limit 2 
"""
sqlite_err_conn.execute(statement=sql_stmt).fetchall()

In [None]:
sql_stmt = """
select * from cytoplasm
"""
df = pd.read_sql(sql=sql_stmt, con=sqlite_err_conn)
df["Cytoplasm_Correlation_Costes_AGP_DNA"].iloc[0]

In [None]:
type(df["Cytoplasm_Correlation_Costes_AGP_DNA"].iloc[0])

In [None]:
df["Cytoplasm_Correlation_Costes_AGP_DNA"].iloc[1]

In [None]:
type(df["Cytoplasm_Correlation_Costes_AGP_DNA"].iloc[1])

In [None]:
df["Cytoplasm_Correlation_Costes_AGP_DNA"].dtype

## Using Table with 'nan' Converted to SQLite NULL

In [None]:
sql_stmt = """
select 
    Cytoplasm_Correlation_Costes_AGP_DNA, 
    typeof(Cytoplasm_Correlation_Costes_AGP_DNA)
from cytoplasm
limit 2 
"""
sqlite_err_fixed_conn.execute(statement=sql_stmt).fetchall()

In [None]:
sql_stmt = """
select * from cytoplasm
"""
df = pd.read_sql(sql=sql_stmt, con=sqlite_err_fixed_conn)
df["Cytoplasm_Correlation_Costes_AGP_DNA"].iloc[0]

In [None]:
type(df["Cytoplasm_Correlation_Costes_AGP_DNA"].iloc[0])

In [None]:
df["Cytoplasm_Correlation_Costes_AGP_DNA"].iloc[1]

In [None]:
type(df["Cytoplasm_Correlation_Costes_AGP_DNA"].iloc[1])

In [None]:
df["Cytoplasm_Correlation_Costes_AGP_DNA"].dtype