In [1]:
# Import modules needed
import pandas as pd
from PyPDF2 import PdfFileReader
import slate3k as slate

# Florida EV Registrations

In [2]:
# Read csv file into dataframe
evreg_path = "static/data/fl_ev_registrations_public.csv"
evreg_df = pd.read_csv(evreg_path, encoding="utf-8")

# Remove unnecessary columns
evreg_df = evreg_df[['Registration Valid Date','County','Vehicle Name']]

#Rename columns
evreg_df = evreg_df.rename(columns={'Registration Valid Date':'report_date'
                        ,'County':'county'
                        ,'Vehicle Name':'vehicle_name'
                        })
evreg_df.head()

Unnamed: 0,report_date,county,vehicle_name
0,6/30/2018,Dade,Tesla Model X
1,6/30/2018,Dade,Tesla Model X
2,6/30/2018,Dade,Tesla Model X
3,6/30/2018,Dade,Tesla Model X
4,6/30/2018,Dade,Tesla Model X


In [3]:
# ----- Seperate Vehicle Make and Model ----- #
# Create list of unique vehicle makes & models
vehicles_ls = evreg_df['vehicle_name'].unique()

# Create placeholder dataframe
vehicles_df = pd.DataFrame({})

# Loop through list, seperating make and model
for vehicle in vehicles_ls:
    
    # Find Make of vehicle
    split_results = vehicle.split(' ')
    make = split_results[0]
    
    # fix makes with multimple words in the name
    if make == 'Land':
        make += ' '+split_results[1]
        
    # Find model of vehicle
    model = vehicle[len(make)+1:]
    
    # Append to vehicle dataframe
    vehicles_df = vehicles_df.append({'vehicle_name':vehicle,'make':make,'model':model}, ignore_index=True)

# Reset index and rename column
vehicles_df = vehicles_df.reset_index()
vehicles_df = vehicles_df.rename(columns={'index':'vehicle_id'})


vehicles_df.head()

Unnamed: 0,vehicle_id,make,model,vehicle_name
0,0,Tesla,Model X,Tesla Model X
1,1,BMW,X5,BMW X5
2,2,Volvo,XC90 Plug In,Volvo XC90 Plug In
3,3,Volvo,XC60 Plug In,Volvo XC60 Plug In
4,4,Chevrolet,Volt,Chevrolet Volt


In [4]:
# Update evreg_df with vehicle_id   
evreg_clean_df = evreg_df.merge(vehicles_df,left_on='vehicle_name', right_on='vehicle_name')
evreg_clean_df = evreg_clean_df[['report_date','county','vehicle_id']]
# Reset index and rename column
evreg_clean_df = evreg_clean_df.reset_index()
evreg_clean_df = evreg_clean_df.rename(columns={'index':'vehicle_id'})
evreg_clean_df.head()

Unnamed: 0,vehicle_id,report_date,county,vehicle_id.1
0,0,6/30/2018,Dade,0
1,1,6/30/2018,Dade,0
2,2,6/30/2018,Dade,0
3,3,6/30/2018,Dade,0
4,4,6/30/2018,Dade,0


# All Florida regitrations

In [5]:
file_ls = ['fl_reg_2019.pdf','fl_reg_2020.pdf','fl_reg_2021.pdf']

path = 'static/data/'+file_ls[0]

# Initalize PDF reader to get page count
reader = PdfFileReader(open(path,'rb'))
page_ct = reader.getNumPages()

# Read PDF text
with open(path,'rb') as f:
    pdf = slate.PDF(f, just_text=1)

page_num = 0

# ------------------------------------------
# Test code before putting into loop
# ------------------------------------------

pdf[page_num].split('\n')




['County',
 'Name',
 '',
 'Vehicle Type Desc',
 '',
 'AUTOS &',
 'PICKUPS',
 '',
 'HEAVY',
 'TRUCKS',
 '',
 'MOTORCYCLE',
 '',
 'BUS',
 '',
 'TOOLS',
 '',
 'VESSEL',
 '',
 'VEHICLE',
 'TRAILER',
 '',
 'TRAVEL',
 'TRAILER',
 '',
 'MOBILE',
 'HOME',
 '',
 'Grand Total',
 '',
 'Grand Total',
 '',
 'ALACHUA',
 '',
 'BAKER',
 '',
 'BAY',
 '',
 'BRADFORD',
 '',
 'BREVARD',
 '',
 'BROWARD',
 '',
 'CALHOUN',
 '',
 'CHARLOTTE',
 '',
 'CITRUS',
 '',
 'CLAY',
 '',
 'COLLIER',
 '',
 'COLUMBIA',
 '',
 'DESOTO',
 '',
 'DIXIE',
 '',
 'DUVAL',
 '',
 'ESCAMBIA',
 '',
 'FLAGLER',
 '',
 'FRANKLIN',
 '',
 'GADSDEN',
 '',
 'GILCHRIST',
 '',
 'GLADES',
 '',
 'GULF',
 '',
 'HAMILTON',
 '',
 'HARDEE',
 '',
 'HENDRY',
 '',
 'HERNANDO',
 '',
 'HIGHLANDS',
 '',
 'HILLSBORO..',
 '',
 'HOLMES',
 '',
 'INDIAN RIV..',
 '',
 'JACKSON',
 '',
 'JEFFERSON',
 '',
 'LAFAYETTE',
 '',
 'LAKE',
 '',
 'LEE',
 '',
 'LEON',
 '',
 'LEVY',
 '',
 'LIBERTY',
 '',
 'MADISON',
 '',
 'MANATEE',
 '',
 'MARION',
 '',
 'MARTIN',
 '',
 'M

In [6]:
# file_ls = ['fl_reg_2019.pdf','fl_reg_2020.pdf','fl_reg_2021.pdf']

# for file in file_ls:
#     path = 'static/data/'+file
    
#     # Initalize PDF reader to get page count
#     reader = PdfFileReader(open(path,'rb'))
#     page_ct = reader.getNumPages()
    
#     # Read PDF text
#     with open(path,'rb') as f:
#         pdf = slate.PDF(f, just_text=1)

#     print('===================================================================')
#     print(f'{file} has {page_ct} pages')
#     print('===================================================================')
    
#     page_num = 0

#     while page_num+1 <= page_ct:
#         print('--------------------------------------------------')
#         print(f'Page #: {page_num+1}')
#         print('--------------------------------------------------')
#         print(pdf[page_num].split('\n'))
#         page_num += 1




