In [124]:
import pandas as pd
import numpy as np
import json
import string
import textacy.preprocessing as tprep
from datetime import datetime, timezone
from app_store_scraper import AppStore
from pandas import json_normalize

from sklearn.feature_extraction.text import TfidfVectorizer

import spacy
nlp = spacy.load('en_core_web_sm')
from spacy.tokenizer import Tokenizer
from spacy.lang.en.stop_words import STOP_WORDS
nlp.Defaults.stop_words |= {"find","found", "like", "feel"}

In [125]:
def return_apple_reviews(app_name, app_id, max_reviews, country='us'):
    '''
    Return dataframe with reviews.
    '''
    app_reviews = AppStore(country='us', app_name=app_name, app_id = app_id)
    app_reviews.review(how_many=max_reviews)
    app_df = pd.DataFrame.from_dict(app_reviews.reviews)
    # Breaks up column developer reponses from one json string into multiple columns
    app_df[['developer_id', 
            'developer_response', 
            'developer_response_modified']] = json_normalize(app_df['developerResponse'])
    
    #Cleaning Date Formats
    app_df['developer_response_date'] = pd.to_datetime(app_df['developer_response_modified'])
    app_df['developer_response_date'] = app_df['developer_response_date'].apply(lambda d: d.replace(tzinfo=None))
    app_df['customer_rating_date'] = pd.to_datetime(app_df['date'])
    
    #Drop unneeded columns
    #app_df = app_df.drop(['developerResponse', 'developer_response_modified', 'date'], axis=1)
    
    #Metric Creation
    app_df['days_until_responce'] = (app_df['developer_response_date']- app_df['customer_rating_date']).dt.days
    
    return app_df

In [126]:
df = return_apple_reviews('found', 1581179653, 2000)

2023-03-06 13:33:06,128 [INFO] Base - Initialised: AppStore('us', 'found', 1581179653)
2023-03-06 13:33:06,130 [INFO] Base - Ready to fetch reviews from: https://apps.apple.com/us/app/found/id1581179653
2023-03-06 13:33:06,523 [INFO] Base - [id:1581179653] Fetched 82 reviews (82 fetched in total)


In [127]:
def clean_ratings(text):
    #instantiate spacy class
    doc = nlp(text)
    
    cleaned_doc = []
    
    #remove stop words & punctuation, lemmatize text, lowercase text, remove any extra spaces before or after
    for token in doc:
        if token.is_stop == False and token.pos_ not in ("PUNCT", "NUM"):
            cleaned_doc.append(token.lemma_.lower().strip())
            
    return " ".join(cleaned_doc)

In [128]:
df['customer_review_cleaned'] = df['review'].apply(clean_ratings)

In [153]:
vectorizer = TfidfVectorizer(
                                max_features=10,
                                max_df=1,
                                min_df=1,
                                ngram_range = (2,5),
                                stop_words = "english"
                            )

In [155]:
high_ratings_result = vectorizer.fit_transform(df[df['rating']>=4]['customer_review_cleaned']).toarray()
high_rate = pd.DataFrame(high_ratings_result, columns = vectorizer.get_feature_names_out())
high_rate.columns = ["word_" + str(x) for x in high_rate.columns]
high_rate.index = df[df['rating']>=4].index
high_reviews_df = pd.concat([df[df['rating']>=4], high_rate], axis=1)

In [156]:
high_rate.columns

Index(['word_app easy', 'word_app help', 'word_coach awesome', 'word_easy use',
       'word_highly recommend', 'word_log routine', 'word_lose weight',
       'word_love app', 'word_user friendly', 'word_weight loss'],
      dtype='object')

In [157]:
high_reviews_df

Unnamed: 0,date,developerResponse,review,rating,isEdited,title,userName,developer_id,developer_response,developer_response_modified,...,word_app easy,word_app help,word_coach awesome,word_easy use,word_highly recommend,word_log routine,word_lose weight,word_love app,word_user friendly,word_weight loss
0,2022-10-04 22:43:00,"{'id': 32461912, 'body': 'Thank you for your 5...",I’ve been on the found program for approximate...,5,False,I love this! Im gaining confidence in myself🤍,Dannieg3212,32461912.0,Thank you for your 5-star rating! We are so ex...,2022-10-09T01:39:19Z,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2022-03-11 19:33:23,,Personalized program with the ongoing support ...,5,False,Great program and Useful App,TiMarie83,,,,...,0.0,0.0,0.0,0.0,0.435909,0.0,0.814175,0.0,0.0,0.383539
5,2022-03-25 15:22:57,,The program is fine but I’m not a fan of the a...,4,False,The program is good not a fan of the app,lsduncan8,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.727845,0.0,0.685741,0.0
8,2022-09-29 16:59:33,"{'id': 32328657, 'body': 'You made our day! Th...","I started Found in July 2022, by September I w...",4,False,Finally got my appetite under control!!!,candlegal,32328657.0,You made our day! Thank you for reviewing the ...,2022-09-30T23:27:47Z,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13,2022-03-24 19:43:15,,Speaking in regard to the program holistically...,5,False,360 Weight + Health + Well-being - how refresh...,LadyLuluBelle,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
15,2021-12-02 00:50:21,,I love that you can easily log your routines a...,5,False,User friendly app!,Bettywhitesrus,,,,...,0.0,0.0,0.0,0.0,0.0,0.727845,0.0,0.0,0.685741,0.0
16,2022-03-16 16:19:13,,This review is solely of the app. It’s a simpl...,4,False,"Fairly good app, great program!",Tx9999999,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
19,2022-09-30 16:48:36,"{'id': 32410684, 'body': 'This is what we love...",Found has been a game changer for me. I’ve tri...,5,False,Game changer,Shananigans07,32410684.0,This is what we love to hear! We are cheering ...,2022-10-05T20:12:07Z,...,0.0,0.0,0.0,0.574081,0.614726,0.0,0.0,0.0,0.0,0.540872
20,2022-04-13 20:53:43,"{'id': 29194132, 'body': 'Thank you so much fo...",This app is a fantastic tool to help you with ...,5,False,Awesome app!,i write reviews.com,29194132.0,Thank you so much for your thoughtful feedback...,2022-04-15T13:57:52Z,...,0.0,0.0,0.0,0.0,0.730855,0.682532,0.0,0.0,0.0,0.0
22,2021-12-13 22:48:09,,This new app works well with the key component...,5,False,Good companion app to the program,Avid_ebook_reader,,,,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [158]:
low_ratings_result = vectorizer.fit_transform(df[df['rating']<4]['customer_review_cleaned']).toarray()
low_rate = pd.DataFrame(low_ratings_result, columns = vectorizer.get_feature_names_out())

In [159]:
low_rate.columns

Index(['community want', 'doctor listen', 'doctor locate', 'fitness pal',
       'highly recommend', 'lose weight', 'program app', 'response support',
       'want internet know', 'weight loss'],
      dtype='object')

In [None]:
low_rate.columns

In [116]:
reviews_df[reviews_df['rating']==5]

Unnamed: 0,title,rating,word_answer question,word_app easy,word_app help,word_app need,word_app super,word_app super easy,word_calorie count,word_coach awesome,...,word_super easy use,word_support coach,word_support program,word_track weight,word_try log,word_user friendly,word_want internet know,word_weight feel,word_weight loss,word_weight loss program
0,I love this! Im gaining confidence in myself🤍,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Great program and Useful App,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.291511,0.0
13,360 Weight + Health + Well-being - how refresh...,5,0.0,0.0,0.0,0.0,0.0,0.0,0.434694,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.590415,0.405748
15,User friendly app!,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.673717,0.0,0.0,0.0,0.0
19,Game changer,5,0.0,0.0,0.0,0.0,0.339304,0.339304,0.0,0.0,...,0.339304,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.230426,0.31671
20,Awesome app!,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22,Good companion app to the program,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.590152,0.0,0.0,0.0,0.0,0.0,0.0
23,Love the app,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
24,Great improvements in the app,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
25,Amazing!,5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [93]:
reviews_df

Unnamed: 0,title,rating,word_activity,word_add,word_answer,word_app,word_ask,word_awesome,word_cancel,word_change,...,word_try,word_use,word_user,word_user friendly,word_want,word_way,word_week,word_weight,word_weight loss,word_work
0,I love this! Im gaining confidence in myself🤍,5,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.271189,0.000000,0.000000,0.000000
1,Preying on people needing support,1,0.000000,0.0,0.0,0.065465,0.000000,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.125450,0.0,0.114248,0.000000,0.000000,0.000000
2,Do not recommend,1,0.000000,0.0,0.0,0.127661,0.000000,0.0,0.253579,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
3,Great program and Useful App,5,0.183654,0.0,0.0,0.088903,0.000000,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.331034,0.155151,0.143351
4,"A dangerous, negligent scam",1,0.000000,0.0,0.0,0.000000,0.297919,0.0,0.137145,0.0,...,0.0,0.264615,0.0,0.0,0.000000,0.0,0.000000,0.171391,0.240985,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,App does not work for me,1,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
78,Lack of response and support,1,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
79,Loss of trust,1,0.112671,0.0,0.0,0.218166,0.000000,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.104518,0.0,0.000000,0.135392,0.000000,0.175890
80,Scam,1,0.000000,0.0,0.0,0.000000,0.000000,0.0,0.000000,0.0,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000


In [91]:
reviews_df[[rating]]

Unnamed: 0,date,developerResponse,review,rating,isEdited,title,userName,developer_id,developer_response,developer_response_modified,...,word_try,word_use,word_user,word_user friendly,word_want,word_way,word_week,word_weight,word_weight loss,word_work
0,2022-10-04 22:43:00,"{'id': 32461912, 'body': 'Thank you for your 5...",I’ve been on the found program for approximate...,5,False,I love this! Im gaining confidence in myself🤍,Dannieg3212,32461912.0,Thank you for your 5-star rating! We are so ex...,2022-10-09T01:39:19Z,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.271189,0.000000,0.000000,0.000000
1,2022-09-11 11:54:54,"{'id': 31975629, 'body': 'We apologize for the...",I’m so discouraged by found. The so-called hea...,1,False,Preying on people needing support,E0619,31975629.0,We apologize for the less-than-ideal experienc...,2022-09-12T17:59:35Z,...,0.0,0.000000,0.0,0.0,0.125450,0.0,0.114248,0.000000,0.000000,0.000000
2,2023-02-15 17:39:12,"{'id': 34935885, 'body': 'Thank you for taking...",Absolutely do not recommend this company. Firs...,1,False,Do not recommend,lizshagil,34935885.0,Thank you for taking the time to leave a revie...,2023-02-17T22:50:34Z,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
3,2022-03-11 19:33:23,,Personalized program with the ongoing support ...,5,False,Great program and Useful App,TiMarie83,,,,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.331034,0.155151,0.143351
4,2022-07-18 21:39:43,"{'id': 31022317, 'body': 'We're so sorry to he...",The doctors on this service are never consiste...,1,False,"A dangerous, negligent scam",mis2008,31022317.0,We're so sorry to hear about your experience. ...,2022-07-21T16:24:40Z,...,0.0,0.264615,0.0,0.0,0.000000,0.0,0.000000,0.171391,0.240985,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
77,2023-02-10 21:49:55,"{'id': 34892949, 'body': 'Thank you for taking...",It say cannot connect to server,1,False,App does not work for me,Ecjrmom,34892949.0,Thank you for taking the time to leave a revie...,2023-02-15T22:17:51Z,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
78,2022-06-27 22:46:20,"{'id': 30647255, 'body': 'We apologize for the...","Lack of response, no support, false expectations.",1,False,Lack of response and support,rd32123987,30647255.0,We apologize for the less-than-ideal experienc...,2022-06-30T17:08:14Z,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000
79,2022-08-30 01:15:49,"{'id': 31975575, 'body': 'We are grateful that...",I want Found to succeed. The app is startup so...,1,False,Loss of trust,CBrodi,31975575.0,We are grateful that you selected us to assist...,2022-09-12T17:57:02Z,...,0.0,0.000000,0.0,0.0,0.104518,0.0,0.000000,0.135392,0.000000,0.175890
80,2022-11-21 08:44:08,"{'id': 33316797, 'body': 'Hi there! We are sor...",Never received a consultation or medication.,1,False,Scam,bronze Jessie,33316797.0,Hi there! We are sorry you were not happy with...,2022-11-23T01:42:37Z,...,0.0,0.000000,0.0,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000


In [60]:
dense = vectors.todense()
denselist = dense.tolist()