#### Clarusway Python

* [Instructor Landing Page](landing_page.ipynb)
* <a href="https://colab.research.google.com/github/4dsolutions/clarusway_data_analysis/blob/main/Kirby%20Notebooks/DAwPy_sandbox.ipynb"><img align="left" src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open in Colab" title="Open and Execute in Google Colaboratory"></a>
* [![nbviewer](https://raw.githubusercontent.com/jupyter/design/master/logos/Badges/nbviewer_badge.svg)](https://nbviewer.org/github/4dsolutions/clarusway_data_analysis/blob/main/Kirby%20Notebooks/DAwPy_sandbox.ipynb)

<a id="toc"></a>

## <p style="background-color:#0D8D99; font-family:newtimeroman; color:#FFF9ED; font-size:175%; text-align:center; border-radius:10px 10px;">Looking Back: The pandas DataFrame<br>Looking Ahead: to SQL</p>

In [None]:
import pandas as pd
import numpy as np
from os import path

In [None]:
import sqlite3 as sql  # part of Python Standard Library

In [None]:
class Connector:
    
    def __init__(self, conn_name):
        self.cn_name = conn_name
        
    def __enter__(self):
        try:
            self.conn = sql.connect(self.cn_name)
            print("Connection: ", self.conn)
            self.curs = self.conn.cursor()
            # self.list_tables() # optional
        except:
            print("No connection")
            raise

        return self
    
    def lookup(self, table, column, code):
        """
        return the data for column = code condition
        """
        self.curs.execute(f"SELECT * FROM {table} WHERE {column} = ?", (code, ))
        return self.curs.fetchone() # could be None, could be a tuple
    
    def list_tables(self):
        """
        print a listing of all the tables in this db
        https://www.sqlitetutorial.net/sqlite-show-tables/
        """
        self.curs.execute("""SELECT name FROM sqlite_schema  
                            WHERE type ='table' AND name 
                            NOT LIKE 'sqlite_%';
                            """)    
        # loop through whatever table names were found 
        # and filtered and print them out.
        for nm in self.curs.fetchall():
            print(nm)
         
    def __exit__(self, *oops):
        """
        I process exceptions i.e. *oops consists of 
        a 3-tuple, we hope filled with Nones because 
        all went well.  Otherwise, exception info.
        return either True or False to determine if
        __exit__ does or does not raise an exception.
        """
        self.conn.close()
        if oops[0]:
            print("An error occurred")
            return False  # raise exception
        return True       # all good

In [None]:
path.isfile("airports.db")

In [None]:
with Connector("airports.db") as conn:
    conn.list_tables()

In [None]:
with Connector("./airports.db") as DB:
    df = pd.read_sql("SELECT * FROM Airports", con = DB.conn)
    print(DB.lookup("Airports", "iata", "SFO"))
    print(DB.lookup("Airports", "iata", "PDX"))

In [None]:
df

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.type.nunique()

In [None]:
df.type.unique()

In [None]:
df.groupby(["type"]).agg("count")

In [None]:
df.status.nunique()

In [None]:
df.status.unique()

In [None]:
df["size"].nunique()

In [None]:
df["size"].unique()

In [None]:
df["size"].value_counts(dropna=False) # show the Nonesdf.

In [None]:
df.dropna(axis=0, how="any", inplace=False)

In [None]:
df2 = df.dropna(axis=0, how="any", inplace=False)

In [None]:
df2.info()

In [None]:
big = df2[(df["type"] == "airport") & (df["size"] == "large")].reset_index(drop=True)

In [None]:
medium = df2[(df["type"] == "airport") & (df["size"] == "medium")].reset_index(drop=True)

In [None]:
small = df2[(df["type"] == "airport") & (df["size"] == "small")].reset_index(drop=True)

In [None]:
df2.loc[:, ["iata", "iso", "name"]]

In [None]:
big = big.loc[:, ["iata", "iso", "name"]]
medium = medium.loc[:, ["iata", "iso", "name"]]
small = small.loc[:, ["iata", "iso", "name"]]
latlong = df2.loc[: , ["iata", "continent", "lat", "lon"]]

In [None]:
big.info()

In [None]:
medium.info()

In [None]:
small.info()

In [None]:
latlong.info()

In [None]:
big.join(latlong.set_index("iata"), on="iata", how="inner", sort=True) # right index set to iata

In [None]:
pd.merge(big, latlong, how='left', on='iata', sort=True)

In [None]:
big[big.duplicated('iata')]

In [None]:
big[big.iata == "HYD"]

In [None]:
big.info()

In [None]:
big = big.drop(index=421)

In [None]:
big.info()

In [None]:
latlong.size

In [None]:
latlong.duplicated('iata')==False

In [None]:
df3 = latlong[latlong.duplicated()==False]

In [None]:
df3

In [None]:
df3.duplicated().value_counts()

In [None]:
df3[df3.iata == 'YAX']

In [None]:
pd.merge(big, df3, how='left', on='iata', sort=True)

In [None]:
big

In [None]:
df3