## Jurupa Valley Current Planning Project Scraper

In [90]:
import sys
sys.path.append('/Projects/regionintelligenceai/map/')

In [91]:
# Import libraries
import pandas as pd
import numpy as np
import requests
import json
import time
import os
import re

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

# Grab Current-Planning-Developments
link = "https://www.jurupavalley.org/336/Current-Planning-Development-Review"
df = pd.read_html(link)[0]


In [92]:
def set_header(df):
    """
    This function takes a dataframe as input and sets the first row as the header of the dataframe.
    """
    if df.empty:
        raise ValueError('Dataframe is empty')
    
    new_header = df.iloc[0] #grab the first row for the header
    df = df[1:] #take the data less the header row
    df.columns = new_header #set the header row as the df header
    df.dropna(inplace=True, thresh=2)
    return df

In [93]:
df = set_header(df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True, thresh=2)


In [95]:
# Rename columns
df.rename(columns={'Project Name': 'projectName', 'Address': 'address', 'Applicant': 'applicantName', 'Status': 'status'}, inplace=True)


In [96]:
df

Unnamed: 0,projectName,address,applicantName,status
1,Agua Mansa Road Development Project,Agua Mansa Road/Hall Avenue (APNS: 175210032; 175210034; 175210059),Carson-VA Industries,Entitled
2,Rock Rose & Bayberry Place (Appaloosa Springs),6501 Clay St.,"I.H.C. Jurupa, LLC",Under Construction
3,BRE Space Center,Iberia Street and Space Center Court (APN: 156150069),"BRE Space Center Mira Loma, LLC",Entitled
5,Troy Court,"4725, 4790, and 4795 Troy Court","Davis JCR, Troy Court Industrial Owner, LLC",Entitled
6,Shops at Jurupa Valley,"NE corner of Pyrite and Mission (APN:171020001, 171020002, 171020025)","Panorama Development, LLC",Under Construction
7,Vernola Marketplace Apartments- Phase A,SW corner of Van Buren Boulevard and Rutile Street (APNS: 167330006; 167330010; 167330015; & 167110039),,Entitled
8,Wineville Marketplace,SE corner of Wineville & Limonite (APNS:157250011 & 157250013),,Entitled


In [71]:
# Grab Planning Staff
link_2 = "https://www.jurupavalley.org/Directory.aspx?did=9"
df_2 = pd.read_html(link_2)[2]
df_2 

Unnamed: 0_level_0,Staff,Staff,Staff,Staff,Staff
Unnamed: 0_level_1,Name,Title,Email,Phone,Unnamed: 4_level_1
0,"General Housing Inquires, .",,"<!-- var w = ""housinginfo""; var x = ""jurupavalley.org""; var y = ""housinginfo"" + '@' + ""jurupavalley.org""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",,
1,"General Planning Inquiries, .",,"<!-- var w = ""planninginfo""; var x = ""jurupavalley.org ""; var y = ""planninginfo"" + '@' + ""jurupavalley.org ""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",,
2,"Perez , Joe",Community Development Director,"<!-- var w = ""jperez""; var x = ""jurupavalley.org""; var y = ""jperez"" + '@' + ""jurupavalley.org""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",(951) 332-6464 Ext. 207,
3,"Guevara, Dianne",Deputy Community Development Director,"<!-- var w = ""dguevara""; var x = ""jurupavalley.org ""; var y = ""dguevara"" + '@' + ""jurupavalley.org ""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",(951) 332-6464 Ext. 203,
4,"Tam, Annette",Planning Manager,"<!-- var w = ""atam""; var x = ""jurupavalley.org""; var y = ""atam"" + '@' + ""jurupavalley.org""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",(951) 332-6464 Ext. 216,
5,"Aquino, Reynaldo",Senior Planner,"<!-- var w = ""raquino""; var x = ""jurupavalley.org""; var y = ""raquino"" + '@' + ""jurupavalley.org""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",(951) 332-6464 Ext. 217,
6,"Del Rio, Miguel",Associate Planner,"<!-- var w = ""mdelrio""; var x = ""jurupavalley.org""; var y = ""mdelrio"" + '@' + ""jurupavalley.org""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",(951) 332-6464 Ext. 222,
7,"Estrada, Oscar",Planning Technician,"<!-- var w = ""osestrada""; var x = ""jurupavalley.org""; var y = ""osestrada"" + '@' + ""jurupavalley.org""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",(951) 332-6464 Ext. 150,
8,"Gonzalez, Roberto",Senior Planner,"<!-- var w = ""rgonzalez""; var x = ""jurupavalley.org""; var y = ""rgonzalez"" + '@' + ""jurupavalley.org""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",(951) 332-6464 Ext. 149,
9,"Huerta, Jesus",Associate Planner,"<!-- var w = ""jhuerta""; var x = ""jurupavalley.org""; var y = ""jhuerta"" + '@' + ""jurupavalley.org""; var z = document.write(""<a href=\""mailto:"" + w + '@' + x + '\"">' + y +'</a>'); //-->",(951) 332-6464 x128,


In [72]:
import pandas as pd

def clean_columns(df):
    # Dropping the 'Staff' level if it is level 0 in the MultiIndex
    df.columns = df.columns.droplevel(0)

    # Removing any columns that have 'Unnamed' in the column name
    df = df.loc[:, ~df.columns.str.contains('Unnamed')]
    df.dropna(inplace=True)

    return df

# Example usage:
# Assuming df_2 is your DataFrame with the MultiIndex as shown in your image
df= clean_columns(df)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(inplace=True)


In [97]:
def extract_email(js_code):
    # This pattern assumes that the variable 'y' contains the email
    pattern = re.compile(r'var y = "(.*?)" \+ \'@\' \+ "(.*?)";')
    matches = pattern.search(js_code)
    if matches:
        return matches.group(1) + '@' + matches.group(2)
    return None  # Return None or appropriate value if pattern not found

# Apply the function to the 'Email' column
df_2['Email'] = df_2['Email'].apply(extract_email)
## Rename columns
df_2.rename(columns={'Name': 'planner', 'Phone': 'phone', 'Title': 'title', 'Email': 'email'}, inplace=True)

print(df_2['Email'])

2        jperez@jurupavalley.org
3     dguevara@jurupavalley.org 
4          atam@jurupavalley.org
5       raquino@jurupavalley.org
6       mdelrio@jurupavalley.org
7     osestrada@jurupavalley.org
8     rgonzalez@jurupavalley.org
9       jhuerta@jurupavalley.org
10         clua@jurupavalley.org
11        Kraza@jurupavalley.org
12     esoriano@jurupavalley.org
13      jtorres@jurupavalley.org
14       avidal@jurupavalley.org
Name: Extracted_Email, dtype: object


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_2['Email'] = df_2['Email'].apply(extract_email)
