## ANAHEIM HAS NO AVAILABLE WAY TO SCRAPE THE DATA FROM THE WEBSITE, THEREFORE ONCE A WEEK WE NEED TO GATHER RECENT DATA MANUALLY. 

In [2]:
import sys
sys.path.append('/Projects/regionintelligenceai/')


In [7]:
import os
import requests
from datetime import datetime
import time
import numpy as np
import pandas as pd

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.service import Service

from src.driver_config import get_chrome_driver
from src.const import ANAHEIM_PLANNING_URL, ANAHEIM_ANDYS_MAP_URL
from src.paths import RAW_DATA_DIR

def get_most_recent_file(path, file_startswith):
    """
    Gets the most recent file from the given path that starts with the provided prefix.
    """
    list_of_files = [f for f in os.listdir(path) if f.startswith(file_startswith)]
    
    # Ensure there are files to sort
    if not list_of_files:
        return None

    # Sort files by their creation time
    latest_file = max(list_of_files, key=lambda x: os.path.getctime(os.path.join(path, x)))
    return os.path.join(path, latest_file)

def read_anaheim_csv():
    '''
    Reads in the most recent csv file from RAW DATA DIR
    '''
    anaheim_path = get_most_recent_file(RAW_DATA_DIR / 'anaheim', 'AndysMap')
    dev_apps_path = get_most_recent_file(RAW_DATA_DIR / 'anaheim', 'dev_apps')
    
    if not anaheim_path or not dev_apps_path:
        raise Exception("Couldn't find the required files!")

    current_projects_df = pd.read_csv(anaheim_path)
    current_applications_df = pd.read_csv(dev_apps_path)

    return current_projects_df, current_applications_df

df1, df2 = read_anaheim_csv()



In [8]:
from src.const import anaheim_planner_emails, anaheim_planner_phones, anaheim_planner_names, ANAHEIM_PLANNING_OFFICE_EMAIL, ANAHEIM_PLANNING_OFFICE_PHONE
import re

def process_the_dataframe(df):
    """
    Processes the dataframe to get the desired columns. Also adds email and phone columns.
    """
    if 'Staff Name' in df.columns:
        # Map names to emails and phone numbers using the dictionaries
        df['email'] = df['Staff Name'].map(anaheim_planner_emails)
        df['phone'] = df['Staff Name'].map(anaheim_planner_phones)
        
        # If some staff names don't have matching entries in the dictionaries,
        # you can fill those missing entries with default values.
        df['email'].fillna(ANAHEIM_PLANNING_OFFICE_EMAIL, inplace=True)
        df['phone'].fillna(ANAHEIM_PLANNING_OFFICE_PHONE, inplace=True)
    else:
        # If 'Staff Name' column is not found, you can still add email and phone columns
        # with default values if required.
        df['email'] = ANAHEIM_PLANNING_OFFICE_EMAIL
        df['phone'] = ANAHEIM_PLANNING_OFFICE_PHONE

    df.rename(columns={
        'Address': 'address',
        'Description': 'description',
        'Application Name': 'projectName',
        'Type of Use': 'typeOfUse',
        'Case Status': 'status',
        'Applicant': 'owner',
        'Opened Date': 'recentUpdate'
    }, inplace=True)

    # Add the city column
    df['city'] = 'Anaheim'

    # Choose desired columns
    df = df[['address', 'description', 'projectName', 'typeOfUse', 'status', 'owner', 'recentUpdate', 'email', 'phone', 'city']]

    def remove_brackets(projectName):
        """
        Removes brackets from the given column name.
        """
        return re.sub(r'\[.*?\]\s*', '', projectName)
    df['projectName'] = df['projectName'].apply(remove_brackets)
    
    return df

df1 = process_the_dataframe(df1)
df1
    

Unnamed: 0,address,description,projectName,typeOfUse,status,owner,recentUpdate,email,phone,city
0,321 W Katella Ave,The Applicant Proposes Adjustment No. 14 To Th...,[DEV2010-00166J] ANAHEIM GARDENWALK - STC,Other,Withdrawn,MELODY YANG,12/10/2019,EThien@anaheim.net,(714) 765-4568,Anaheim
1,500 W Disney Way,Amend The Development Agreement Between The Ci...,[DEV2010-0166I] ANAHEIM GARDENWALK - WESTGATE,Other,In Review,DAVID SIEGEL,2/14/2019,Planning@anaheim.net,(714) 765-5139,Anaheim
2,8163 E Kaiser Blvd,The Applicant Proposes To Amend An Existing Co...,[DEV2013-00024C] INFLUENCE CHURCH,Commercial,Approved,PHIL HOTENPILLER,7/29/2020,Planning@anaheim.net,(714) 765-5139,Anaheim
3,1700 S Harbor Blvd,A Request To Amend The Development Agreement B...,[DEV2015-00120A] 1700 S. HARBOR HOTEL,Other,Approved,PAUL SANFORD,10/2/2019,EThien@anaheim.net,(714) 765-4568,Anaheim
4,3111 W Orange Ave,Request To Amend A Conditional Use Permit To P...,[DEV2018-00130A] DAYBREAK UNIVERSITY,Commercial,In Review,,9/15/2021,TGorham@Anaheim.net,(714) 765-4947,Anaheim
...,...,...,...,...,...,...,...,...,...,...
388,425 S Cooks Corner,A Request For A Variance To Allow A Building H...,Nguyen Custom Home,Residential,In Review,,7/31/2023,iorozco@anaheim.net,(714) 765-4948,Anaheim
389,200 W Midway Dr,A Request For Conceptual Development Review Of...,Legacy Anaheim,Residential,In Review,,8/1/2023,JMBarriga@Anaheim.net,(714) 765-5380,Anaheim
390,275 E Santa Ana St,"A Request For A General Plan Amendment, Reclas...",The Mill,Residential,In Review,,8/3/2023,TGorham@Anaheim.net,(714) 765-4947,Anaheim
391,2564 W Woodland Dr,A Request For A Minor Conditional Use Permit A...,Medical Office,Commercial,In Review,,8/15/2023,iorozco@anaheim.net,(714) 765-4948,Anaheim


In [5]:
df1