## Making Recommendations Based on Correlation

In [1]:
# Import Dependencies
import pandas as pd
import numpy as np

These datasets are hosted on: https://archive.ics.uci.edu/ml/datasets/Restaurant+%26+consumer+data

They were originally published by: Blanca Vargas-Govea, Juan Gabriel GonzÃ¡lez-Serna, Rafael Ponce-MedellÃ­n. Effects of relevant contextual features in the performance of a restaurant recommender system. In RecSysâ€™11: Workshop on Context Aware Recommender Systems (CARS-2011), Chicago, IL, USA, October 23, 2011.

In [None]:
# Importing the data set
rating_df = pd.read_csv('./data/rating_final.csv')
cuisine_df = pd.read_csv('./data/chefmozcuisine.csv')
geodata = pd.read_csv('./data/geoplaces.csv', endoding='mbcs')

In [None]:
rating_df.head()

In [None]:
cuisine_df.head()

In [None]:
geodata.head()

In [None]:
# Extract just the placeID and place names from geodata dataframe
places = geodata[['placeID', 'name']]
places.head()

### Grouping and Ranking Data

In [None]:
# Calculate the mean rating for each placeID
rating = pd.DataFrame(rating_df.groupby('placeID')['rating'].mean())
rating.head()

In [None]:
# Calculate the count of ratings and assign to the dataframe
rating['rating_count'] = pd.DataFrame(rating.groupby('placeID')['rating'].count())
rating.head()

In [None]:
# Print description of the data frame
rating.describe()

In [None]:
# Sort the data frame based on the count of rating
rating.sort_values('rating_count', ascending=False).head()

In [None]:
# Check the name of the business
places[places['placeID']==135085]

In [None]:
# Check the cuisine of the business
cuisine[cuisine['placeID']==135085]

### Preparing Data for Analysis

In [None]:
# Creating crosstab table shows rating for each user and place
places_crosstab = pd.pivot_table(data=rating_df, values='rating',
                                 index='userID', columns='placeID')
places_crosstab.head()

In [None]:
# Check user rating at a specific place
tortas_ratings = places_crosstab[135085]

# Print all user ratings equal or above zero
tortas_ratings[tortas_ratings >= 0]

### Evaluating Similarity Based on Correlation

In [None]:
# Calculate the Pearson standard correlation to find similar place like Tortas
similar_to_tortas = places_crosstab.corrwith(tortas_ratings)

# Create a data frame to store the result
corr_tortas = pd.DataFrame(similar_to_tortas, columns=['PearsonR'])
corr_tortas.dropna(inplace=True)
corr_tortas.head()

In [None]:
# Create a summary dataframe
tortas_corr_summary = corr_tortas.join(rating['rating_count'])
tortas_corr_summary[tortas_corr_summary['rating_count'] >= 10].sort_values(
    'PearsonR', ascending = False).head(10)

In [None]:
# Check the top seven placeIDs that are similar to Tortas
places_corr_tortas = pd.DataFrame([135085, 132754, 135045, 135062, 135028, 135042, 135046],
                                  index=np.arange(7), columns=['placeID'])

summary = pd.merge(places_corr_tortas, cuisine, on='placeID')

In [None]:
# Check the name of the similar restaurant
place[place['placeID']==135046]

In [None]:
cuisine['Rcuisine'].described()