In [2]:
# Import everything we are gonna need

import pandas as pd
import matplotlib.pyplot as plt
import json
import scipy
import numpy as np
import ijson
from timeit import default_timer as timer
import datetime
from sklearn import preprocessing

In [5]:
# Reading the business.json dataset

businessDf = pd.read_json("yelp_dataset/business.json", lines=True)
print(len(businessDf))
businessDf.head()

192609


Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state
0,2818 E Camino Acequia Drive,{'GoodForKids': 'False'},1SWheh84yJXfytovILXOAQ,"Golf, Active Life",Phoenix,,0,33.522143,-112.018481,Arizona Biltmore Golf Club,85016,5,3.0,AZ
1,30 Eglinton Avenue W,"{'RestaurantsReservations': 'True', 'GoodForMe...",QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",1,43.605499,-79.652289,Emerald Chinese Restaurant,L5R 3E7,128,2.5,ON
2,"10110 Johnston Rd, Ste 15","{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...",gnKjwL_1w79qoiV3IC_xQQ,"Sushi Bars, Restaurants, Japanese",Charlotte,"{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",1,35.092564,-80.859132,Musashi Japanese Restaurant,28210,170,4.0,NC
3,"15655 W Roosevelt St, Ste 237",,xvX2CttrVhyG2z1dFg_0xw,"Insurance, Financial Services",Goodyear,"{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",1,33.455613,-112.395596,Farmers Insurance - Paul Lorenz,85338,3,5.0,AZ
4,"4209 Stuart Andrew Blvd, Ste F","{'BusinessAcceptsBitcoin': 'False', 'ByAppoint...",HhyxOkGAM07SRYtlQ4wMFQ,"Plumbing, Shopping, Local Services, Home Servi...",Charlotte,"{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ...",1,35.190012,-80.887223,Queen City Plumbing,28217,4,4.0,NC


In [7]:
# Method for creating the KPI column of a dataset. Instead of measuring all our metrics against the Stars rating of a business,
# we are going to create an Index based both on the Stars and the number of reviews. A rating will be more reliable
# if it is backed by a large number of reviews.

def getKPIList(df, reviewCountColumn, starsColumn, mean, std):
    score = []

    for review in reviewCountColumn:
        # Until mean receive score 0.6
        if review <= mean:
            score.append(0.6)
        # Until mean + std dev receive score 0.7
        elif review <= (mean + std):
            score.append(0.7)
        # Until mean + 2*std dev receive score 0.8
        elif review <= (mean + (2 * std)):
            score.append(0.8)
        # Until mean + 3*std dev receive score 0.9
        elif review <= (mean + (3 * std)):
            score.append(0.9)
        # Above mean + 3*std dev receive score 1.0
        else:
            score.append(1.0)

    # Standardize stars column
        # Create the Scaler object
    scaler = preprocessing.StandardScaler()

        # Apply scaler to stars column
    standard_stars = scaler.fit_transform(np.array(starsColumn).reshape(-1, 1))

        # Insert standardized column to dataframe
    df['Standardized Stars'] = standard_stars


    # Insert KPI column to dataframe
    df['KPI'] = df['Standardized Stars']*score

In [8]:
# Creating the KPI column for the business dataset

getKPIList(businessDf, businessDf["review_count"], businessDf.stars, 34, 110)
businessDf.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state,Standardized Stars,KPI
0,2818 E Camino Acequia Drive,{'GoodForKids': 'False'},1SWheh84yJXfytovILXOAQ,"Golf, Active Life",Phoenix,,0,33.522143,-112.018481,Arizona Biltmore Golf Club,85016,5,3.0,AZ,-0.575015,-0.345009
1,30 Eglinton Avenue W,"{'RestaurantsReservations': 'True', 'GoodForMe...",QXAEGFB4oINsVuTFxEYKFQ,"Specialty Food, Restaurants, Dim Sum, Imported...",Mississauga,"{'Monday': '9:0-0:0', 'Tuesday': '9:0-0:0', 'W...",1,43.605499,-79.652289,Emerald Chinese Restaurant,L5R 3E7,128,2.5,ON,-1.065954,-0.746168
2,"10110 Johnston Rd, Ste 15","{'GoodForKids': 'True', 'NoiseLevel': 'u'avera...",gnKjwL_1w79qoiV3IC_xQQ,"Sushi Bars, Restaurants, Japanese",Charlotte,"{'Monday': '17:30-21:30', 'Wednesday': '17:30-...",1,35.092564,-80.859132,Musashi Japanese Restaurant,28210,170,4.0,NC,0.406864,0.325491
3,"15655 W Roosevelt St, Ste 237",,xvX2CttrVhyG2z1dFg_0xw,"Insurance, Financial Services",Goodyear,"{'Monday': '8:0-17:0', 'Tuesday': '8:0-17:0', ...",1,33.455613,-112.395596,Farmers Insurance - Paul Lorenz,85338,3,5.0,AZ,1.388743,0.833246
4,"4209 Stuart Andrew Blvd, Ste F","{'BusinessAcceptsBitcoin': 'False', 'ByAppoint...",HhyxOkGAM07SRYtlQ4wMFQ,"Plumbing, Shopping, Local Services, Home Servi...",Charlotte,"{'Monday': '7:0-23:0', 'Tuesday': '7:0-23:0', ...",1,35.190012,-80.887223,Queen City Plumbing,28217,4,4.0,NC,0.406864,0.244119


In [2]:
# Creating the checkIn dataset. The number of times users checked in (visited) a business.

checkinDf = pd.read_json("yelp_dataset/checkin.json", lines=True)
checkinDf.head()

Unnamed: 0,business_id,date
0,--1UhMGODdWsrMastO9DZw,"2016-04-26 19:49:16, 2016-08-30 18:36:57, 2016..."
1,--6MefnULPED_I942VcFNA,"2011-06-04 18:22:23, 2011-07-23 23:51:33, 2012..."
2,--7zmmkVg-IMGaXbuVd0SQ,"2014-12-29 19:25:50, 2015-01-17 01:49:14, 2015..."
3,--8LPVSo5i0Oo61X01sV9A,2016-07-08 16:43:30
4,--9QQLMTbFzLJ_oT-ON3Xw,"2010-06-26 17:39:07, 2010-08-01 20:06:21, 2010..."


In [3]:
# Creating the Tip dataset. A tip is an advice users give to businesses.

tipDf = pd.read_json("yelp_dataset/tip.json", lines=True)
tipDf.head()

Unnamed: 0,business_id,compliment_count,date,text,user_id
0,VaKXUpmWTTWDKbpJ3aQdMw,0,2014-03-27 03:51:24,"Great for watching games, ufc, and whatever el...",UPw5DWs_b-e2JRBS-t37Ag
1,OPiPeoJiv92rENwbq76orA,0,2013-05-25 06:00:56,Happy Hour 2-4 daily with 1/2 price drinks and...,Ocha4kZBHb4JK0lOWvE0sg
2,5KheTjYPu1HcQzQFtm4_vw,0,2011-12-26 01:46:17,Good chips and salsa. Loud at times. Good serv...,jRyO2V1pA4CdVVqCIOPc1Q
3,TkoyGi8J7YFjA6SbaRzrxg,0,2014-03-23 21:32:49,The setting and decoration here is amazing. Co...,FuTJWFYm4UKqewaosss1KA
4,AkL6Ous6A1atZejfZXn1Bg,0,2012-10-06 00:19:27,Molly is definately taking a picture with Sant...,LUlKtaM3nXd-E4N4uOk_fQ


In [4]:
# Creating the photo dataset.

photoDf = pd.read_json("yelp_dataset/photo.json", lines=True)
photoDf.head()

Unnamed: 0,business_id,caption,label,photo_id
0,rcaPajgKOJC2vo_l3xa42A,,inside,MllA1nNpcp1kDteVg6OGUw
1,Kn23LDd740SBVJ7mum0fwg,,inside,YjxBE88Bf6CmTEF2LP1UNA
2,ZkGDCVKSdf8m76cnnalL-A,,food,1f7izSjM0WjkDRIVbPy1yw
3,bF8gv7k_rwZtiDLP2ZB04w,,inside,NcSlcDTEEeOaixotOPk-rA
4,50Anorn0DJXFhBr9a9_gHQ,,inside,5IiIo5UKEW0lWqZ6sWrY_A


In [33]:
# Method to prepare the User dataset to be read. We need to add square brackets at the start and end of the dataset, and
# a coma between every element.

import ijson

def cleanDataSet(file):
    startTime = timer()
    
    fileName = file + ".json"
    lastline = None
    with open(fileName,"r", encoding="utf-8") as f:
        lineList = f.readlines()
        lastline=lineList[-1]

    with open(fileName,"r", encoding="utf-8") as f, open(file+"Clean.json","w", encoding="utf-8") as g:
        for i,line in enumerate(f,0):
            if i == 0:
                line = "["+str(line)+","
                g.write(line)
            elif line == lastline:            
                g.write(line)
                g.write("]")
            else:
                line = str(line)+","
                g.write(line)
                
    print(f"**** FINISHED CLEANING {fileName} ****")

In [None]:
# Calling the cleaning method

cleanDataSet("yelp_dataset/user")

In [40]:
# The User dataset file is extremely large, and Pandas cannot read it via read_json. Therefore, we have to perform
# a different reading operation. This method uses the "ijson" library in order to open the file as a stream,
# allowing us to read line by line and creating a User csv file which is considerably shorter, as we only write into
# the file the data that we are going to need.

userUserIds = []
userNames = []
reviewCount = []
yelpingSince = []
usefulUser = []
funnyUser = []
coolUser = []
elite = []
friendCount = []
fans = []
averageStars = []

start = timer()
with open('yelp_dataset/userClean.json', encoding="utf-8") as file:
    parser = ijson.parse(file)
    for prefix, event, value in parser:
        if prefix == "item.user_id":
            userUserIds.append(value)
        elif prefix == "item.name":
            userNames.append(value)
        elif prefix == "item.review_count":
            reviewCount.append(value)
        elif prefix == "item.yelping_since":
            yelpingSince.append(value)
        elif prefix == "item.useful":
            usefulUser.append(value)
        elif prefix == "item.funny":
            funnyUser.append(value)
        elif prefix == "item.cool":
            coolUser.append(value)
        elif prefix == "item.elite":
            elite.append(value)
        elif prefix == "item.friends":
            friendCount.append(len(value.split(", ")))
        elif prefix == "item.fans":
            fans.append(value)
        elif prefix == "item.average_stars":
            averageStars.append(value)

end = timer()
print(f"*** FINISHED READING DATASET IN {end - start} SECONDS ***")

*** FINISHED READING DATASET IN 557.0803704152495 SECONDS ***


In [41]:
# Creating the User dataframe using the data previously read from the json file.

userDf = {
    "User Id" : userUserIds,
    "Name" : userNames,
    "Review Count" : reviewCount,
    "Yelping Since" : yelpingSince,
    "Useful" : usefulUser,
    "Funny" : funnyUser,
    "Cool" : coolUser,
    "Elite" : elite,
    "Friend Count" : friendCount,
    "Fans" : fans,
    "Avg Stars" : averageStars
}

userDf = pd.DataFrame(userDf)
userDf.head()

Unnamed: 0,User Id,Name,Review Count,Yelping Since,Useful,Funny,Cool,Elite,Friend Count,Fans,Avg Stars
0,l6BmjZMeQD3rDxWUbiAiow,Rashmi,95,2013-10-08 23:11:33,84,17,25,201520162017.0,99,5,4.03
1,4XChL029mKr5hydo79Ljxg,Jenna,33,2013-02-21 22:29:06,48,22,16,,1152,4,3.63
2,bc8C_eETBWL0olvFSJJd0w,David,16,2013-10-04 00:16:10,28,8,10,,15,0,3.71
3,dD0gZpBctWGdWo9WlGuhlA,Angela,17,2014-05-22 15:57:30,30,4,14,,525,5,4.85
4,MM4RJAeH6yuaN8oZDSt0RA,Nancy,361,2013-10-23 07:02:50,1114,279,665,2015201620172018.0,231,39,4.08


In [49]:
# Storing a csv file from the dataframe. This csv will now be able to be opened with Pandas

userDf.to_csv("yelp_dataset/userDataFrame.csv")

In [32]:
# Using the same cleaning and preparing method that we used for the User dataset, we now clean the Review json file.

cleanDataSet("yelp_dataset/review")

**** FINISHED CLEANING yelp_dataset/review.json in 185.80961149975735 SECONDS ****


In [47]:
# We perform the same stream opening method as before, now creating data for the Review dataset.

reviewIds = []
userIds = []
businessIds = []
stars = []
useful = []
funny = []
cool = []
date = []

start = timer()
with open('yelp_dataset/reviewClean.json', encoding="utf-8") as file:
    parser = ijson.parse(file)
    for prefix, event, value in parser:
        if prefix == "item.review_id":
            reviewIds.append(value)
        elif prefix == "item.user_id":
            userIds.append(value)
        elif prefix == "item.business_id":
            businessIds.append(value)
        elif prefix == "item.stars":
            stars.append(value)
        elif prefix == "item.useful":
            useful.append(value)
        elif prefix == "item.funny":
            funny.append(value)
        elif prefix == "item.cool":
            cool.append(value)
        elif prefix == "item.date":
            d = value
            date.append(datetime.datetime.strptime(value, "%Y-%m-%d %H:%M:%S"))

end = timer()
print(f"*** FINISHED READING DATASET IN {end - start} SECONDS ***")

*** FINISHED READING DATASET IN 1007.8703000493224 SECONDS ***


In [50]:
# Creating the Review dataframe

reviewDf = {
    "Review Id" : reviewIds,
    "User Id" : userIds,
    "Business Id" : businessIds,
    "Stars" : stars,
    "Useful" : useful,
    "Funny" : funny,
    "Cool" : cool,
    "Date" : date
}

reviewDf = pd.DataFrame(reviewDf)
reviewDf.head()

Unnamed: 0,Review Id,User Id,Business Id,Stars,Useful,Funny,Cool,Date
0,Q1sbwvVQXV2734tPgoKj4Q,hG7b0MtEbXx5QzbzE6C_VA,ujmEBvifdJM6h6RLv4wQIg,1.0,6,1,0,2013-05-07 04:34:36
1,GJXCdrto3ASJOqKeVWPi6Q,yXQM5uF2jS6es16SJzNHfg,NZnhc2sEQy3RmzKTZnqtwQ,5.0,0,0,0,2017-01-14 21:30:33
2,2TzJjDVDEuAW6MR5Vuc1ug,n6-Gk65cPZL6Uz8qRm3NYw,WTqjgwHlXbSFevF32_DJVw,5.0,3,0,0,2016-11-09 20:09:03
3,yi0R0Ugj_xUx_Nek0-_Qig,dacAIZ6fTM6mqwW5uxkskg,ikCg8xy5JIg_NGPx-MSIDA,5.0,0,0,0,2018-01-09 20:56:38
4,11a8sVPMUFtaC7_ABRkmtw,ssoyf2_x0EQMed6fgHeMyQ,b1b1eb3uo-w561D0ZfCEiQ,1.0,7,0,0,2018-01-30 23:07:38


In [35]:
# Storing the dataframe in a csv. We now can read this file with Pandas, instead of dealing with the raw json file

reviewDf.to_csv("yelp_dataset/reviewDataFrame.csv")

NameError: name 'reviewDf' is not defined

START WORKING ON OUR HYPOTHESIS: Did the ratings for mexican businesses go down after Trump took office?

In [16]:
# Filter the business dataframe by Mexican-related businesses.

mexicanRestaurants = businessDf.loc[businessDf.categories.str.contains(".exican") == True]
print(len(mexicanRestaurants))
mexicanRestaurants.head()

4628


Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,postal_code,review_count,stars,state,Standardized Stars,KPI
11,2450 E Indian School Rd,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...",1Dfx3zM-rW4n-31KeC8sJg,"Restaurants, Breakfast & Brunch, Mexican, Taco...",Phoenix,"{'Monday': '7:0-0:0', 'Tuesday': '7:0-0:0', 'W...",1,33.495194,-112.028588,Taco Bell,85016,18,3.0,AZ,-0.575015,-0.345009
25,6055 E Lake Mead Blvd,"{'BikeParking': 'True', 'BusinessParking': '{'...",tstimHoMcYbkSC4eBA1wEg,"Mexican, Restaurants, Patisserie/Cake Shop, Fo...",Las Vegas,"{'Monday': '11:0-21:0', 'Tuesday': '10:0-21:0'...",1,36.195615,-115.040529,Maria's Mexican Restaurant & Bakery,89156,184,4.5,NV,0.897804,0.718243
96,1614 Queen Street E,"{'RestaurantsGoodForGroups': 'True', 'Restaura...",NPHZkn1e-tSJAbo8Zm9rYw,"Tex-Mex, Mexican, Restaurants",Toronto,"{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",1,43.666181,-79.316468,Burrito Bandidos,M4L 1G4,43,3.5,ON,-0.084075,-0.058853
146,"3140 S Gilbert Rd, Ste 3","{'GoodForKids': 'True', 'RestaurantsReservatio...",voZnDQs6Hs3YpNcS-9TALg,"Restaurants, Mexican",Chandler,,0,33.261325,-111.790815,New Mexican Grill,85286,84,4.0,AZ,0.406864,0.284805
156,"1770 W Horizon Ridge, Ste 100","{'DriveThru': 'False', 'RestaurantsAttire': ''...",jScBTQtdAt-8RshaiBEHgw,"Ethnic Food, American (New), Burgers, Food, Re...",Henderson,"{'Monday': '0:0-0:0', 'Tuesday': '9:0-15:0', '...",1,36.010745,-115.064803,Served,89012,664,4.5,NV,0.897804,0.897804


In [42]:
# Creating the Review dataframe from the previously created csv file.

reviewDf = pd.read_csv("yelp_dataset/reviewDataFrame.csv")
reviewDf.head()

Unnamed: 0.1,Unnamed: 0,Review Id,User Id,Business Id,Stars,Useful,Funny,Cool,Date
0,0,Q1sbwvVQXV2734tPgoKj4Q,hG7b0MtEbXx5QzbzE6C_VA,ujmEBvifdJM6h6RLv4wQIg,1.0,6,1,0,2013-05-07 04:34:36
1,1,GJXCdrto3ASJOqKeVWPi6Q,yXQM5uF2jS6es16SJzNHfg,NZnhc2sEQy3RmzKTZnqtwQ,5.0,0,0,0,2017-01-14 21:30:33
2,2,2TzJjDVDEuAW6MR5Vuc1ug,n6-Gk65cPZL6Uz8qRm3NYw,WTqjgwHlXbSFevF32_DJVw,5.0,3,0,0,2016-11-09 20:09:03
3,3,yi0R0Ugj_xUx_Nek0-_Qig,dacAIZ6fTM6mqwW5uxkskg,ikCg8xy5JIg_NGPx-MSIDA,5.0,0,0,0,2018-01-09 20:56:38
4,4,11a8sVPMUFtaC7_ABRkmtw,ssoyf2_x0EQMed6fgHeMyQ,b1b1eb3uo-w561D0ZfCEiQ,1.0,7,0,0,2018-01-30 23:07:38


In [51]:
# Separating the Reviews in 2 parts: before Donald Trump became President, and after

dateOfTrumpInauguration = datetime.datetime(2017, 1, 20)
reviewDf.Date = pd.to_datetime(reviewDf.Date)
reviewsBeforeInauguration = reviewDf.loc[reviewDf.Date < dateOfTrumpInauguration]
reviewsAfterInauguration = reviewDf.loc[reviewDf.Date > dateOfTrumpInauguration]
print(len(reviewsBeforeInauguration))
print(len(reviewsAfterInauguration))

4348163
2337737


In [52]:
# Start procesing of the "Before Trump" dataset

groupByBusinessBefore2016 = reviewsBeforeInauguration.groupby("Business Id")
avgStarsBefore2016 = groupByBusinessBefore2016[["Stars"]].mean()
avgStarsBefore2016.Stars = round(avgStarsBefore2016.Stars, 2)
avgStarsBefore2016["Review Count"] = groupByBusinessBefore2016[["Review Id"]].count()
avgStarsBefore2016 = avgStarsBefore2016.reset_index()

avgStarsBefore2016.describe()

Unnamed: 0,Stars,Review Count
count,172712.0,172712.0
mean,3.578155,25.175801
std,1.076889,87.489724
min,1.0,1.0
25%,3.0,3.0
50%,3.67,7.0
75%,4.44,18.0
max,5.0,6661.0


In [53]:
# Creating the KPI column for the "Before Trump" dataset

getKPIList(avgStarsBefore2016, avgStarsBefore2016["Review Count"], avgStarsBefore2016.Stars, 25, 87)
avgStarsBefore2016.columns = ["business_id", "Stars Pre-Trump", "Review Count Pre-Trump", "Std Stars Pre-Trump", "KPI Pre-Trump"]
avgStarsBefore2016.head()

Unnamed: 0,business_id,Stars Pre-Trump,Review Count Pre-Trump,Std Stars Pre-Trump,KPI Pre-Trump
0,--1UhMGODdWsrMastO9DZw,3.82,11,0.224579,0.134747
1,--6MefnULPED_I942VcFNA,3.15,26,-0.397586,-0.27831
2,--7zmmkVg-IMGaXbuVd0SQ,4.03,31,0.419585,0.29371
3,--8LPVSo5i0Oo61X01sV9A,4.0,2,0.391727,0.235036
4,--9QQLMTbFzLJ_oT-ON3Xw,3.44,9,-0.128291,-0.076974


In [54]:
# Start processing of the "After Trump" dataset

groupByBusinessAfter2016 = reviewsAfterInauguration.groupby("Business Id")
avgStarsAfter2016 = groupByBusinessAfter2016[["Stars"]].mean()
avgStarsAfter2016.Stars = round(avgStarsAfter2016.Stars)
avgStarsAfter2016["Count"] = groupByBusinessAfter2016[["Review Id"]].count()
avgStarsAfter2016 = avgStarsAfter2016.reset_index()

avgStarsAfter2016.describe()

Unnamed: 0,Stars,Count
count,150282.0,150282.0
mean,3.562636,15.555669
std,1.269986,43.468826
min,1.0,1.0
25%,3.0,2.0
50%,4.0,5.0
75%,5.0,12.0
max,5.0,2749.0


In [55]:
# creating the KPI column for the "After Trump" dataset

getKPIList(avgStarsAfter2016, avgStarsAfter2016["Count"], avgStarsAfter2016.Stars, 15, 43)
avgStarsAfter2016.columns = ["business_id", "Stars Post-Trump", "Review Count Post-Trump", "Std Stars Post-Trump", "KPI Post-Trump"]
avgStarsAfter2016.head()

Unnamed: 0,business_id,Stars Post-Trump,Review Count Post-Trump,Std Stars Post-Trump,KPI Post-Trump
0,--1UhMGODdWsrMastO9DZw,4.0,15,0.344386,0.206632
1,--6MefnULPED_I942VcFNA,3.0,20,-0.443026,-0.310119
2,--7zmmkVg-IMGaXbuVd0SQ,4.0,28,0.344386,0.24107
3,--8LPVSo5i0Oo61X01sV9A,3.0,2,-0.443026,-0.265816
4,--9QQLMTbFzLJ_oT-ON3Xw,3.0,4,-0.443026,-0.265816


In [56]:
# Merging both datasets

mexicanRestaurantsWithStars = pd.merge(mexicanRestaurants, avgStarsBefore2016, on="business_id", how="left")
mexicanRestaurantsWithStars = pd.merge(mexicanRestaurantsWithStars, avgStarsAfter2016, on="business_id", how="left")
mexicanRestaurantsWithStars.head()

Unnamed: 0,address,attributes,business_id,categories,city,hours,is_open,latitude,longitude,name,...,Standardized Stars,KPI,Stars Pre-Trump,Review Count Pre-Trump,Std Stars Pre-Trump,KPI Pre-Trump,Stars Post-Trump,Review Count Post-Trump,Std Stars Post-Trump,KPI Post-Trump
0,2450 E Indian School Rd,"{'RestaurantsTakeOut': 'True', 'BusinessParkin...",1Dfx3zM-rW4n-31KeC8sJg,"Restaurants, Breakfast & Brunch, Mexican, Taco...",Phoenix,"{'Monday': '7:0-0:0', 'Tuesday': '7:0-0:0', 'W...",1,33.495194,-112.028588,Taco Bell,...,-0.575015,-0.345009,3.0,13.0,-0.536876,-0.322126,3.0,6.0,-0.443026,-0.265816
1,6055 E Lake Mead Blvd,"{'BikeParking': 'True', 'BusinessParking': '{'...",tstimHoMcYbkSC4eBA1wEg,"Mexican, Restaurants, Patisserie/Cake Shop, Fo...",Las Vegas,"{'Monday': '11:0-21:0', 'Tuesday': '10:0-21:0'...",1,36.195615,-115.040529,Maria's Mexican Restaurant & Bakery,...,0.897804,0.718243,4.35,128.0,0.716738,0.573391,4.0,61.0,0.344386,0.275509
2,1614 Queen Street E,"{'RestaurantsGoodForGroups': 'True', 'Restaura...",NPHZkn1e-tSJAbo8Zm9rYw,"Tex-Mex, Mexican, Restaurants",Toronto,"{'Monday': '11:0-23:0', 'Tuesday': '11:0-23:0'...",1,43.666181,-79.316468,Burrito Bandidos,...,-0.084075,-0.058853,3.38,40.0,-0.184007,-0.128805,4.0,3.0,0.344386,0.206632
3,"3140 S Gilbert Rd, Ste 3","{'GoodForKids': 'True', 'RestaurantsReservatio...",voZnDQs6Hs3YpNcS-9TALg,"Restaurants, Mexican",Chandler,,0,33.261325,-111.790815,New Mexican Grill,...,0.406864,0.284805,4.14,86.0,0.521732,0.365212,,,,
4,"1770 W Horizon Ridge, Ste 100","{'DriveThru': 'False', 'RestaurantsAttire': ''...",jScBTQtdAt-8RshaiBEHgw,"Ethnic Food, American (New), Burgers, Food, Re...",Henderson,"{'Monday': '0:0-0:0', 'Tuesday': '9:0-15:0', '...",1,36.010745,-115.064803,Served,...,0.897804,0.897804,4.47,198.0,0.828171,0.662537,4.0,486.0,0.344386,0.344386


In [57]:
# Remove restaurants that don't have reviews either before or after Inauguration

mexicanRestaurantsWithStars = mexicanRestaurantsWithStars.dropna(how="any")
print(len(mexicanRestaurantsWithStars))

2547


In [58]:
mexicanRestaurantsWithStars.describe()

Unnamed: 0,is_open,latitude,longitude,review_count,stars,Standardized Stars,KPI,Stars Pre-Trump,Review Count Pre-Trump,Std Stars Pre-Trump,KPI Pre-Trump,Stars Post-Trump,Review Count Post-Trump,Std Stars Post-Trump,KPI Post-Trump
count,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0,2547.0
mean,0.897919,37.144105,-100.625186,125.998822,3.411857,-0.170621,-0.095008,3.480546,85.524146,-0.09064,-0.047684,3.307028,44.637613,-0.201269,-0.114258
std,0.302814,4.266141,15.865262,224.124025,0.752571,0.738934,0.502419,0.759326,158.606864,0.705112,0.466306,0.973309,90.772725,0.766396,0.503972
min,0.0,33.224009,-115.351816,3.0,1.0,-2.538772,-1.777141,1.0,1.0,-2.394083,-1.519853,1.0,1.0,-2.017852,-1.412496
25%,1.0,33.508812,-112.286782,23.0,3.0,-0.575015,-0.40251,3.0,13.0,-0.536876,-0.353184,3.0,8.0,-0.443026,-0.354421
50%,1.0,36.031958,-111.921998,55.0,3.5,-0.084075,-0.058853,3.56,36.0,-0.016858,-0.011801,3.0,18.0,-0.443026,-0.265816
75%,1.0,41.119244,-80.996268,133.0,4.0,0.406864,0.284805,4.02,91.0,0.410299,0.29371,4.0,45.0,0.344386,0.24107
max,1.0,51.294184,-73.443186,3268.0,5.0,1.388743,1.110994,5.0,2817.0,1.320331,1.041749,5.0,2142.0,1.131799,1.131799


PLOTS AND CONCLUSIONS

In [59]:
# Did the ratings for mexican businesses go down after Trump took office?

meanKpiPreTrump = round(mexicanRestaurantsWithStars["KPI Pre-Trump"].mean(), 2)
meanKpiPostTrump = round(mexicanRestaurantsWithStars["KPI Post-Trump"].mean(), 2)

print(f"KPI Pre-Trump = {meanKpiPreTrump}")
print(f"KPI Post-Trump = {meanKpiPostTrump}")
print()

if meanKpiPreTrump > meanKpiPostTrump:
    print(f"The KPI for Mexican-related businesses was higher before Trump became President")
else:
    print(f"The KPI for Mexican-related businesses was higher after Trump became President")

KPI Pre-Trump = -0.05
KPI Post-Trump = -0.11

The KPI for Mexican-related businesses was higher before Trump became President
