# MAPC Team 1 Notebook

In [1]:
# code block for imports 
import pandas as pd 
import numpy as np
import sqlite3
from sqlalchemy import create_engine
import os

In [2]:
# functions for reading data as a df and inserting into sql db
def readData(filename):
    data = pd.read_csv(filename)
    return data

def insertDataToDB(tablename, df,conn):
    df.to_sql(tablename,conn, if_exists="replace")


In [3]:
# connect to database cs506MAPC.db
conn = sqlite3.connect('cs506MAPC.db')

# read data from FCC June 2019 data. 
# If this line causes errors for you, you first need to download the data and 
# and unzip it into your local copy of your repository. (Can't upload the CSV to git because it's too large)
# to download the zip file, go here --> https://github.com/MAPC/broadband-data-bu/tree/main/FCC%20data
# NOTE: Make sure that you have the .csv file in the 'MAPC Broadband Equity - Team 1' folder and that you rename it
# to "fcc_data_june2019.csv" 

# df = readData("fcc_data_june2019.csv")
# insertDataToDB("fcc_data_t",df,conn)

## Display first 10 Providers with no service (0 upload and 0 download)

In [None]:
qrystr ="SELECT DISTINCT ProviderName,substr(BlockCode,0,12) as tractNum,MaxAdDown,MaxAdUp FROM FCC_DATA_T ORDER BY substr(BlockCode,0,12) ASC"


#convert query results into dataframe
df_filtered_providers = pd.read_sql_query(qrystr, conn)
print(df_filtered_providers.head(10))




**Display first 10 Providers with service in blocks**

In [None]:
qryStr2 = "SELECT DISTINCT ProviderName, substr(BlockCode,0,12) as tractNum, MaxAdDown, MaxAdUp FROM FCC_DATA_T WHERE MaxAdDown != 0.0 AND MaxAdUp != 0.0 ORDER BY substr(BlockCode,0,12) ASC"
df_filtered_non_zero_providers = pd.read_sql_query(qryStr2, conn)
print(df_filtered_non_zero_providers.head(10))


In [None]:
qryStr3 = "SELECT DISTINCT ProviderName, substr(BlockCode,0,12) as tractNum, MaxAdDown, MaxAdUp FROM FCC_DATA_T WHERE MaxAdDown = 0.0 AND MaxAdUp = 0.0 ORDER BY substr(BlockCode,0,12) ASC"
df_filtered_zero_providers = pd.read_sql_query(qryStr3, conn)
print(df_filtered_zero_providers.head(10))

In [None]:
df = readData("median_income.csv")
print(df.head(10))
df_filtered = df[['ct10_id','mhi','mhi_me','o_mhi','o_mhi_me','r_mhi','r_mhi_me']]
insertDataToDB("median_income_t",df_filtered,conn)


#todo: filter data from fcc data which has 0 max_ad_up and max_ad_down
#this means company doesnt supply that area. 

**Command to import cities with their long and lat**

In [None]:
df = readData("MACityLongLat.csv")
insertDataToDB("CityLongLat_t",df,conn)

In [4]:
qryStr = "CREATE TABLE IF NOT EXISTS censusblocks_t (town TEXT, blockcode TEXT PRIMARY KEY NOT NULL)"
cursor = conn.cursor()
cursor.execute(qryStr)

dict = {}


for filename in os.listdir('census_blocks'):
    readFile = os.path.join('census_blocks', filename)
    names = filename.split(".")
    cityName = names[0].upper()
    
    f = open(readFile,"r");
    
    lines = f.readlines();

    count = 0
    for line in lines:
        if count == 0:
            count += 1
            continue
        entries = line.split(";")
        tracts = entries[3].split(".");
        censusblock = entries[1] + entries[2] + tracts[0] + tracts[1]
        
        if censusblock not in dict:
            dict[censusblock] = cityName
        else:
            continue



        cursor.execute("INSERT INTO censusblocks_t VALUES(?,?)",(cityName,censusblock));
        conn.commit()