<a href="https://colab.research.google.com/github/AlaknandaA/Airbnb-data-analysis-visualization/blob/gh-pages/ContrastMapping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [35]:
# Mounting google drive

from google.colab import auth
auth.authenticate_user()
import gspread
from oauth2client.client import GoogleCredentials
gc = gspread.authorize(GoogleCredentials.get_application_default())

In [36]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

pd.set_option('display.max_columns', None)
# pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)

In [37]:
# Importing data from drive

FlavorMapping = gc.open_by_url('https://docs.google.com/spreadsheets/d/1xOCY7QyvyQsM7hoMZwOXY6Hni75KcNDCagWwda9RVBw/edit?usp=sharing')

In [38]:
# Reading data from the google sheet

sheet1 = FlavorMapping.worksheet('Contrast Flavour Mapping')
sheet1 = sheet1.get_all_values()

In [39]:
# Converting google sheet data into dataframe and setting column and row indexes
FlavorMapping = pd.DataFrame(sheet1)

In [40]:
# Converting all values to lower case, stripping spaces and joining the taste_tag with taste_score, replacing the string 'na' with nulls
FlavorMapping = FlavorMapping.applymap(lambda x: x.lower() if type(x) is str else x)
FlavorMapping = FlavorMapping.applymap(lambda x: x.strip() if type(x) is str else x)
FlavorMapping = FlavorMapping.applymap(lambda x: ''.join(x.split()) if type(x) is str else x)
FlavorMapping.replace({'na':np.nan, 'crisp':'sour2','acidic':'sour4','vinegary':'sour5','meaty':'umami4','gamey':'umami5'}, inplace=True)

In [41]:
# Keeping only the rows that have values and excluding all blanks
FlavorMapping = FlavorMapping.iloc[4:, :].copy()

# Setting the first and second columns of dataframe as indexes to make a multiindex dataframe
FlavorMapping.set_index(FlavorMapping.columns[[0,1]].tolist(), inplace=True)

# Setting names of the indexes in multiindex
FlavorMapping.index.set_names(['sourcegroup','source'], inplace=True)

In [42]:
# Getting names of all source flavours
indexes = FlavorMapping.index.get_level_values(1)

# Creating a new header variable with cartesian mix that I want in my column header names
header = pd.MultiIndex.from_product([indexes,
                                     ['contrast','result']],
                                    names=['target','contrast/result'])

# Setting columns of dataframe to header
FlavorMapping.columns = header

In [43]:
# Dropping contrast column as I don't need it.
FlavorMapping.drop('contrast', axis=1, level=1, inplace=True)
FlavorMapping.columns = FlavorMapping.columns.droplevel('contrast/result')

In [44]:
# Importing data from Gdrive

Final1000Recipes = gc.open_by_url('https://docs.google.com/spreadsheets/d/1fkiB20JhqSwOFxpXTTpIgykasRwMw3duhFF5_rrBm60/edit?usp=sharing')

In [45]:
# Reading data from the google sheet

sheet2 = Final1000Recipes.worksheet('complexity scores test')
sheet2 = sheet2.get_all_values()

In [46]:
# Converting google sheet data into dataframe and setting column and row indexes

French = pd.DataFrame(sheet2)
French.columns = French.iloc[0]
French = French.iloc[1:]

#Creating a copy of French
Frenchcopy = French.copy()

In [47]:
French.set_index('dish', inplace=True)
French.index

Index(['Lobio Satsivi', 'Lobio Satsivi', 'Lobio Satsivi', 'Lobio Satsivi',
       'Lobio Satsivi', 'Lobio Satsivi', 'Lobio Satsivi', 'Lobio Satsivi',
       'Lobio Satsivi', 'Lobio Satsivi',
       ...
       'Banana Flower Fry', 'Banana Flower Fry', 'Banana Flower Fry',
       'Banana Flower Fry', 'Banana Flower Fry', 'Banana Flower Fry',
       'Banana Flower Fry', 'Banana Flower Fry', 'Banana Flower Fry',
       'Banana Flower Fry'],
      dtype='object', name='dish', length=25093)

In [48]:
Frenchcopy.head(2)

Unnamed: 0,dish,Aroma_Tag,Aroma_score,Mouthfeel_Tag,Mouthfeel_score,Taste_Tag,Taste_score,Color_Tag,Texture_Tag
1,Lobio Satsivi,grassy,1.0,toothsome,1.0,sweet,3,"{'green': {'score': 5}, 'cream': {'score': 2},...","{'meaty': {'score': 1}, 'firm': {'score': 2}, ..."
2,Lobio Satsivi,,,,,bitter,1,,


In [49]:
# Converting all the values in Taste_Tag column to lower case
Frenchcopy['Taste_Tag'] = Frenchcopy['Taste_Tag'].apply(lambda x: x.lower())
Frenchcopy['Taste_Tag'] = Frenchcopy['Taste_Tag'].apply(lambda x: x.strip())

In [50]:
# Removing the taste_score where taste_tag is in CrispList

CrispList = ['crisp','bland/neutral','caramelized','acidic','vinegary','fatty','meaty','gamey','neutral','bland']
Frenchcopy.loc[Frenchcopy['Taste_Tag'].isin(CrispList), 'Taste_score'] = ''

In [None]:
pd.DataFrame(Frenchcopy['Taste_Tag'].unique())

In [52]:
# Replacing all bad values in French Taste_Tag column with values that are present in flavormapping sheet

Frenchcopy.Taste_Tag.replace({'neutral':'bland/neutral',
                          'bland':'bland/neutral',
                          'crisp':'sour2',
                          'acidic':'sour4',
                          'vinegary':'sour5',
                          'meaty':'umami4',
                          'gamey':'umami5',
                          '':np.nan},
                          inplace=True)

In [53]:
Frenchcopy.head(2)

Unnamed: 0,dish,Aroma_Tag,Aroma_score,Mouthfeel_Tag,Mouthfeel_score,Taste_Tag,Taste_score,Color_Tag,Texture_Tag
1,Lobio Satsivi,grassy,1.0,toothsome,1.0,sweet,3,"{'green': {'score': 5}, 'cream': {'score': 2},...","{'meaty': {'score': 1}, 'firm': {'score': 2}, ..."
2,Lobio Satsivi,,,,,bitter,1,,


In [54]:
# Appending Taste_Tag and Taste_score columns to get a Full_Taste_Tag column whose values can correspond with the Flavor Mapping sheet
Frenchcopy['Full_Taste_Tag'] = Frenchcopy['Taste_Tag'] + Frenchcopy['Taste_score']

# Stripping the values of new column Full_Taste_Tag again
Frenchcopy['Full_Taste_Tag'] = Frenchcopy['Full_Taste_Tag'].apply(lambda x: x.strip() if isinstance(x, str) else x)

In [55]:
# Grouping French df based on dish
French2 = Frenchcopy.groupby(['dish'])

# Getting all the taste_tags of a particular dish in one list and creating a new dataframe with dish names and list of taste tags
French3 = pd.DataFrame(French2.apply(lambda x: x['Full_Taste_Tag'].unique()))

# Renaming the column name from 0 to DishFlavorTags
French3.rename(columns={0:'DishFlavorTags'}, inplace=True)

French3.head(3)

Unnamed: 0_level_0,DishFlavorTags
dish,Unnamed: 1_level_1
3 Bean Israeli Couscous Salad,"[bitter3, umami3, sour5, sour4, sour2, salty2,..."
AROMATIC RICE,"[bland/neutral, spicy5, sweet2, bitter1, sour1..."
Acar,"[sweet3, salty1, sour5, sour4, sour2, nan]"


In [56]:
# Converting all numpy array values in the DishFlavorTags column to list
French3['DishFlavorTags'] = French3['DishFlavorTags'].apply(lambda x: x.tolist())

# Removing the null values from each list in DishFlavorTags column
French3['DishFlavorTags'] = French3['DishFlavorTags'].apply(lambda x: [item for item in x if isinstance(item, str)])

In [57]:
# Taking just the first 20 rows of full dataframe for testing
# firstdish = French3.iloc[0:20, :].copy()

In [58]:
# Main code for contrast mapping

def MappingTheFlavors(flavorList):
  FinalFlavList = []
  
  # print(flavorList, type(flavorList), len(flavorList))
  # Adding all the flavors not present in the Flavor Mapping sheet to the final list of flavors as is and removing them from the above
  for flavor in flavorList:
    # print(flavor)
    if flavor not in FlavorMapping.index.get_level_values(1):
      FinalFlavList.append(flavor)
  flavorList = [x for x in flavorList if x not in FinalFlavList]
  # print(flavorList)


  # Code for mapping the flavors
  if (len(flavorList) == 1):
    FinalFlavList.extend(flavorList.copy())
    # print(FinalFlavList)
  else:
    for i in range (0, len(flavorList)):
      index = 0
      Sflavor = flavorList.pop(index)
      for Tflavor in flavorList:
        FinalFlavList.append(FlavorMapping.loc[FlavorMapping.index.get_level_values('source') == Sflavor, Tflavor].values[0]) #This is a time-consuming line
      flavorList.append(Sflavor)

  # print(FinalFlavList)
  FinalFlavList = list(set(FinalFlavList))
  # Removing nulls from the flavor list
  FinalFlavList = [x for x in FinalFlavList if pd.isnull(x) == False]
  return(FinalFlavList)

In [59]:
# Running above UDF on New Flavor Tags
French3.loc[:,'NewFlavorTags'] = French3['DishFlavorTags'].apply(lambda x: MappingTheFlavors(x))

In [60]:
 # Removing same flavors of same intensity from the same dish flavor tags. Eg- if both sweet1 and sweet2 are present in the same tag, only sweet2 will ke kept

def split(items):
    biggest=dict()
    newlist = []
    for i in items:
        string = i[:-1]
        try:
          number = int(i[-1])
          if string in biggest:
              if biggest[string]<number:
                biggest[string]=number
          else:
              biggest[string]=number
        except:
          newlist.append(i)
            
    mynewlist= list([k+str(v) for k,v in biggest.items()])
    mynewlist.extend(newlist.copy())

    # Replacing the flavor names
    replacements = {'sour2':'crisp','sour4':'acidic','sour5':'vinegary','umami4':'meaty','umami5':'gamey'}
    mynewlist = [replacements.get(x, x) for x in mynewlist]
    return(mynewlist)

In [61]:
# firstdish.loc[:,'NewNewFlavorTags'] = firstdish['NewFlavorTags'].apply(lambda x: split(x))
French3.loc[:,'Contrast_Flavor_Tag'] = French3['NewFlavorTags'].apply(lambda x: split(x))

In [62]:
# Dropping extra columns
French3.drop(['DishFlavorTags','NewFlavorTags'], axis='columns', inplace=True)
French3.head()

Unnamed: 0_level_0,Contrast_Flavor_Tag
dish,Unnamed: 1_level_1
3 Bean Israeli Couscous Salad,"[salty2, nutty1, bitter3, sweet&spicy2, tart4,..."
AROMATIC RICE,"[spicy5, bittersweet1, sour1, bitter1, sweet2]"
Acar,"[tangy3, vinegary, sweet3, salty1, tart4, swee..."
Achcharu,"[vinegary, fruity3, nutty3, sweet6, salty5, sp..."
Acorn Squash Bhaji with Panch Phoron,"[tangy3, vinegary, sweet3, spicy5, nutty1, bit..."


In [63]:
# Explosing column of lists to have each list element in a different row
ContrastMapped = French3.explode('Contrast_Flavor_Tag')

In [64]:
ContrastMapped.head()

Unnamed: 0_level_0,Contrast_Flavor_Tag
dish,Unnamed: 1_level_1
3 Bean Israeli Couscous Salad,salty2
3 Bean Israeli Couscous Salad,nutty1
3 Bean Israeli Couscous Salad,bitter3
3 Bean Israeli Couscous Salad,sweet&spicy2
3 Bean Israeli Couscous Salad,tart4


In [65]:
# Merging the original dataframe and the above dataframe
FinalDF = French.merge(ContrastMapped, left_on='dish', right_on='dish', how='left')
FinalDF.reset_index(inplace=True)

In [66]:
# Assigning the same values as FinalDF tags columns to another column d, and then replace all duplicate values with null
FinalDF.loc[FinalDF.assign(d=FinalDF.Aroma_Tag).duplicated(['Aroma_Tag','dish', 'd']), 'Aroma_Tag'] = np.nan
FinalDF.loc[FinalDF.assign(d=FinalDF.Taste_Tag).duplicated(['Taste_Tag','dish', 'd']), 'Taste_Tag'] = np.nan
FinalDF.loc[FinalDF.assign(d=FinalDF.Mouthfeel_Tag).duplicated(['Mouthfeel_Tag','dish', 'd']), 'Mouthfeel_Tag'] = np.nan
FinalDF.loc[FinalDF.assign(d=FinalDF.Contrast_Flavor_Tag).duplicated(['Contrast_Flavor_Tag','dish', 'd']), 'Contrast_Flavor_Tag'] = np.nan

# Replacing score column with nulls where tag column is null
FinalDF.loc[(pd.isnull(FinalDF.Aroma_Tag)), 'Aroma_score'] = np.nan
FinalDF.loc[(pd.isnull(FinalDF.Mouthfeel_Tag)), 'Mouthfeel_score'] = np.nan
FinalDF.loc[(pd.isnull(FinalDF.Taste_Tag)), 'Taste_score'] = np.nan

FinalDF.loc[FinalDF.assign(d=FinalDF.Color_Tag).duplicated(['Color_Tag','dish', 'd']), 'Color_Tag'] = np.nan
FinalDF.loc[FinalDF.assign(d=FinalDF.Texture_Tag).duplicated(['Texture_Tag','dish', 'd']), 'Texture_Tag'] = np.nan

# Setting index, removing nulls and resetting index
FinalDF.set_index('dish', inplace=True)
FinalDF.dropna(how='all', inplace=True)
FinalDF.reset_index(inplace=True)

In [67]:
# Making two columns out of one column-one for actual tag and another for tag score.

FinalDF['Contrast_Flavor_Score'] = FinalDF.Contrast_Flavor_Tag.str.extract(r'([\d]+)',expand=False)
FinalDF['Contrast_Flavor_Tag'] = FinalDF.Contrast_Flavor_Tag.str.replace('\d+', '')
FinalDF.loc[FinalDF['Contrast_Flavor_Tag'].isin(CrispList), 'Contrast_Flavor_Score'] = 3

In [68]:
# Saving to drive
FinalDF.to_csv(r'/content/drive/MyDrive/Contrast_Mapped.csv', index=False)