## Imports

In [None]:
import pandas as pd
import numpy as np
from psaw import PushshiftAPI
import datetime 
from bs4 import BeautifulSoup 
import requests
import re
import time
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix
from sklearn import svm, datasets
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

## Enter City (Emphasis on city) where natural disaster occured

In [None]:
searchcity = 'dallas'

## Creates a list of URLS to search through

In [None]:
url_list=[]
for page in range(1,6000):
    url_list.append("https://api.adzuna.com:443/v1/api/jobs/us/search/"+ str(page) +"?app_id=d3330ea8&app_key=cbcbe79274381fae5af07fb6828067ce&results_per_page=100&where="+str(searchcity)+"&sort_direction=up&sort_by=date&max_days_old=365")

## Starts pulling data from Adunza API for jobs

In [None]:
spot = 1
pulleddata = []
for url in url_list:
    try: 
        requests.get(url).json()['results'][0]
        pulleddata.append(requests.get(url).json())
        print(str(spot) + '0 job ads pulled')
        spot += 1
        time.sleep(1)
    except: 
        print('This is the end of data')
        break

## Structures data 

In [None]:
structureddata = []
for x in range(0, len(pulleddata)):
    for y in pulleddata[x]['results']:
        structureddata.append(y)

## Creates Dataframe and creates a saved file (in case of "oopsies")

In [None]:
df = pd.DataFrame()

for x in range(0, len(structureddata)):
    try:
        df['company'] = [structureddata[x]['company']['display_name'] for x in range(0, len(structureddata))]
        df['date'] = [structureddata[x]['created'][:7] for x in range(0, len(structureddata))]
        df['title']= [structureddata[x]['title']for x in range(0, len(structureddata))]
        df['All_Sectors']= [structureddata[x]['category']['label']for x in range(0, len(structureddata))]

    except:
        print('error')
        pass

try: 
    df.drop_duplicates(inplace=True)
except:
    pass

In [None]:
df

## Changing date to datetime data type

In [None]:
try:
    df['date'] = pd.to_datetime(df['date'])
except:
    pass

## Null (all should be 0)

In [None]:
df.isnull().sum()

## Number of Data Points

In [None]:
len(df)

## Frequency of posting by dates

In [None]:
df['date'].value_counts().sort_index()

## Most common Sectors posted

In [None]:
df['All_Sectors'].value_counts()

## Industry Dummyvariable DataFrame

In [None]:
industrygraphs = pd.get_dummies(df['All_Sectors'])
industry = pd.concat([df, industrygraphs], axis=1)
for x in industry.columns[6:]:
    print(x)

## Saves Dataframe (with dummies, to cityname.csv)

In [None]:
industry.to_csv(str(searchcity)+'data.csv')

## Creating of graphing data

In [None]:
daterange = pd.date_range(start="2018-01", end='2019-01', freq='M')
graphme = pd.DataFrame(columns=[x for x in industry.columns[6:]], index= daterange)
graphme

In [None]:
graphme['2018-02']

In [None]:
graphme.fillna(0, inplace=True)

In [None]:
for x in range(6, (len(industry.columns))):
    for date in industry[industry[str(industry.columns[x])] == 1]['date']:
        for inde in graphme.index:
            if str(date)[0:7] == str(inde)[:7]:
                graphme[str(inde)[0:7]][str(industry.columns[x])]+=1
graphme.to_csv(str(searchcity)+"graphdata.csv")

In [None]:
graphme

## Plotly graph

In [None]:
traces = {}
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot

for num in range(0, (len(graphme))):
    try:
        traces['trace'+str(num)] = dict(
                    x= graphme.index,
                    y= graphme[graphme.columns[num]],
                    mode='lines',
                    name= graphme.columns[num])
        print(name)
    except:
        pass

data = [traces['trace'+str(x)] for x in range(0, (len(traces)))]

plot({'data':data,
       'layout' : {'title': str(searchcity) +" Job searches for 2018"},
     }, filename=str(searchcity)+"PLOTLY.html")


iplot({'data':data,
       'layout' : {'title': str(searchcity) +" Job searches for 2018"}})

## Matplotlib

In [None]:
plt.figure(figsize=(25,15))
for num in range(6, len(industry.columns)):
    sns.lineplot(data = graphme)
plt.legend(labels=graphme.columns)
plt.title(str(searchcity) + " Job searches for 2018")
plt.grid()
plt.tight_layout()
plt.savefig(str(searchcity)+"Matplotlib.jpg")